View Javadoc

1   package yarfraw.utils;
2   
3   import static yarfraw.utils.CommonConstants.MIN_PER_DAY;
4   import static yarfraw.utils.CommonConstants.MIN_PER_MONTH;
5   import static yarfraw.utils.CommonConstants.MIN_PER_WEEK;
6   import static yarfraw.utils.CommonConstants.MIN_PER_YEAR;
7   
8   import java.math.BigInteger;
9   import java.text.ParseException;
10  import java.text.SimpleDateFormat;
11  import java.util.Date;
12  
13  import org.apache.commons.lang.StringUtils;
14  import org.apache.commons.logging.Log;
15  import org.apache.commons.logging.LogFactory;
16  
17  import yarfraw.core.datamodel.FeedFormat;
18  import yarfraw.core.datamodel.YarfrawException;
19  import yarfraw.generated.rss10.elements.UpdatePeriodEnum;
20  
21  /***
22   * Utilities methods.
23   * 
24   * @author jliang
25   *
26   */
27  public class CommonUtils{
28    private static final Log LOG = LogFactory.getLog(CommonUtils.class);
29    
30    public static final String RSS20_JAXB_CONTEXT = "yarfraw.generated.rss20.elements";
31    public static final String RSS10_JAXB_CONTEXT = "yarfraw.generated.rss10.elements";
32    public static final String ATOM10_JAXB_CONTEXT = "yarfraw.generated.atom10.elements";
33    public static final String ATOM03_JAXB_CONTEXT = "yarfraw.generated.atom03.elements";
34    
35    /////////////////////DATE PARSING///////////////////////////////////
36    public static final String RFC822DATE_PATTERN = "EEE, dd MMM yyyy HH:mm:ss zzz";
37    public static final String ISO8601DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ssZ";
38    public static final String ISO_8601_LVL1_PATTERN = "yyyy";
39    public static final String ISO_8601_LVL2_PATTERN = "yyyy-MM";
40    public static final String ISO_8601_LVL3_PATTERN = "yyyy-MM-dd";
41    public static final String ISO_8601_LVL4_PATTERN = "yyyy-MM-dd'T'HH:mmZ";
42    public static final String ISO_8601_LVL5_PATTERN = "yyyy-MM-dd'T'HH:mm:ssZ";
43    public static final String ISO_8601_LVL6_PATTERN = "yyyy-MM-dd'T'HH:mm:ss.sZ";
44    
45    public static final String NON_STANDARD_PATTERN1 = "EEE, dd MMM yyyy HH:mm:ss z";
46    public static final String NON_STANDARD_PATTERN2 = "EEE, dd MMM yyyy HH:mm zzzz";
47    public static final String NON_STANDARD_PATTERN3 = "EEE, dd MMM yy HH:mm:ss z";
48    public static final String NON_STANDARD_PATTERN4 = "yyyy-MM-dd'T'HH:mm:ss.SSSzzzz";
49    public static final String NON_STANDARD_PATTERN5 = "yyyy-MM-dd'T'HH:mm:sszzzz";
50    public static final String NON_STANDARD_PATTERN6 = "yyyy-MM-dd'T'HH:mm:ss";
51    public static final String NON_STANDARD_PATTERN7 = "yyyy-MM-dd'T'HH:mm:ss.sZ";
52    
53    
54    //6 level of ISO 8601 Date
55    private static final SimpleDateFormat ISO_8601_LVL1 = new SimpleDateFormat(ISO_8601_LVL1_PATTERN);
56    private static final SimpleDateFormat ISO_8601_LVL2 = new SimpleDateFormat(ISO_8601_LVL2_PATTERN);
57    private static final SimpleDateFormat ISO_8601_LVL3 = new SimpleDateFormat(ISO_8601_LVL3_PATTERN);
58    private static final SimpleDateFormat ISO_8601_LVL4 = new SimpleDateFormat(ISO_8601_LVL4_PATTERN);
59    private static final SimpleDateFormat ISO_8601_LVL5 = new SimpleDateFormat(ISO_8601_LVL5_PATTERN);
60    private static final SimpleDateFormat ISO_8601_LVL6 = new SimpleDateFormat(ISO_8601_LVL6_PATTERN);
61    
62    private static final SimpleDateFormat RFC822DATE_FORMAT = new SimpleDateFormat(RFC822DATE_PATTERN);
63  
64    private static final SimpleDateFormat[] RFC_FORMAT = new SimpleDateFormat[]{
65      RFC822DATE_FORMAT, 
66      new SimpleDateFormat(NON_STANDARD_PATTERN1),
67      new SimpleDateFormat(NON_STANDARD_PATTERN2),
68      new SimpleDateFormat(NON_STANDARD_PATTERN3)
69    };
70    
71    private static final SimpleDateFormat[] NON_STANDARD_ISO_FORMAT = new SimpleDateFormat[]{
72      new SimpleDateFormat(NON_STANDARD_PATTERN4),
73      new SimpleDateFormat(NON_STANDARD_PATTERN5),
74      new SimpleDateFormat(NON_STANDARD_PATTERN6),
75      new SimpleDateFormat(NON_STANDARD_PATTERN7)
76    };
77  
78    private CommonUtils(){}
79  
80    /***
81     * Determines whether the input <code>dateString</code> is valid based on the input <code>FeedFormat</code>.
82     * @param dateString
83     * @param format
84     * @return
85     */
86    public static synchronized boolean isDateFormatValid(String dateString, FeedFormat format){
87      if(format == FeedFormat.ATOM10 || format == FeedFormat.RSS10 || format == FeedFormat.ATOM03){
88        try {
89          return tryParseISODate(dateString) != null ;
90        } catch (Exception e) {
91          //non strict ISO format
92          return tryParseNonStandardIsoDates(dateString) != null;
93        }
94      }else if(format == FeedFormat.RSS20){
95        try{
96          return tryParseRfcDates(dateString) != null;
97        } catch (Exception e) {
98          return false;
99        }
100     }else{
101       throw new IllegalArgumentException("Unsupported format: "+ format);
102     }
103   }
104   
105   /***
106    * Remove last occurrence of the character c in s
107    */
108   private static String removeLast(String s, char c){
109     int idx = s.lastIndexOf(c);
110     if(idx < 0){
111       return s;//nothing to remove
112     }
113     return s.substring(0, idx)+ (idx==s.length()-1? StringUtils.EMPTY: s.substring(idx+1));
114   }
115 
116 
117   /***
118    * Parse a date string using both ISO and RFC formats and some non standard formats.
119    * 
120    * @param dateString
121    * @return {@link Date} representation of the input string. null if
122    * unable to parse input string.
123    */
124   public synchronized static Date tryParseDate(String dateString){
125     Date ret = null;
126     try {
127       ret = tryParseISODate(dateString);
128       return ret;
129     } catch (Exception e) {
130       ret = tryParseNonStandardIsoDates(dateString);
131       if(ret != null){
132         return ret;
133       }else{
134         ret = tryParseRfcDates(dateString);
135       }
136     }
137     if(ret == null){
138       LOG.warn("Unparsable dateString "+dateString+", returning null");
139     }
140     return ret;
141   }
142 
143   /***
144    * Try to parse using rfc format
145    * @param dateString
146    * @return
147    */
148   private static Date tryParseRfcDates(String dateString){
149     Date ret = null;
150     for(SimpleDateFormat format : RFC_FORMAT){
151       try {
152         ret = format.parse(dateString);
153         return ret;
154       } catch (Exception ee) {
155         //keep trying
156       }
157     }
158     return ret;
159   }
160   
161   /***
162    * Try to parse using non standard ISO formats
163    * @param dateString
164    * @return
165    */
166   private static Date tryParseNonStandardIsoDates(String dateString){
167     Date ret = null;
168     for(SimpleDateFormat format : NON_STANDARD_ISO_FORMAT){
169       try {
170         ret = format.parse(dateString);
171         return ret;
172       } catch (Exception ee) {
173         //keep trying
174       }
175     }
176     return ret;
177   }
178   
179   /***
180    * Format a {@link Date} object to string based on the input {@link FeedFormat}.
181    * For Atom 1.0 and RSS 1.0 it will be formatted as ISO8601 Level 5 string. <br/>
182    * http://www.w3.org/TR/NOTE-datetime
183    * <br/>
184    * For RSS 2.0, it will be formatted as RFC 822 date string.
185    * <br/>
186    * http://www.faqs.org/rfcs/rfc822.html
187    * @param date any date 
188    * @param format any {@link FeedFormat}
189    * @return null if either input is null. Formatted date string otherwise.
190    */
191   public static synchronized String formatDate(Date date, FeedFormat format){
192     if(date == null || format == null){
193       return null;
194     }
195     if(format == FeedFormat.ATOM10 || format == FeedFormat.RSS20 || format == FeedFormat.ATOM03){
196       return getDateAsISO8601String(date);
197     }else if(format == FeedFormat.RSS10 ){
198       return RFC822DATE_FORMAT.format(date);
199     }else{
200       throw new IllegalArgumentException("Unsupported format: "+ format);
201     }
202   }
203   
204   /***
205    * Simple date format does not support ISO0861 date, so i have to do some hacking
206    * <br/>
207    * return null if date is null
208    */
209   private static String getDateAsISO8601String(Date date)
210   { 
211     if(date == null){
212       return null;
213     }
214     String result = ISO_8601_LVL5.format(date);
215     //convert YYYYMMDDTHH:mm:ss+HH00 into YYYYMMDDTHH:mm:ss+HH:00
216     //- note the added colon for the Timezone
217     result = result.substring(0, result.length()-2)
218       + ":" + result.substring(result.length()-2);
219     return result;
220   }
221   
222   /***
223    * Try to parse a date string using different formatting string.
224    * <br/>
225    * return null if dateString is null
226    * @throws YarfrawException 
227    * @throws ParseException 
228    */
229   private synchronized static Date tryParseISODate(String dateString) throws YarfrawException, ParseException{
230     
231     if(dateString == null){
232       return null;
233     }else if(dateString.length() == 4){
234       return ISO_8601_LVL1.parse(dateString);
235     }else if(dateString.length() == 7){
236       return ISO_8601_LVL2.parse(dateString);
237     }else if(dateString.length() == 10){
238       return ISO_8601_LVL3.parse(dateString);
239     }else if(dateString.length() == 22){
240       return ISO_8601_LVL4.parse(removeLast(dateString, ':'));
241     }else if(dateString.length() == 25){
242       return ISO_8601_LVL5.parse(removeLast(dateString, ':'));
243     }else if(dateString.length() == 28){
244       return ISO_8601_LVL6.parse(removeLast(dateString, ':'));
245     }else{
246       throw new YarfrawException("Invalid ISO 8601 Date format: "+dateString);
247     }
248   }
249   
250 /////////////////////DATE PARSING///////////////////////////////////
251   
252   
253   /***
254    * calculate the ttl value from updatePeriod and updateFrequency
255    * @return null if anything unexpcted occurs
256    */
257   public static Integer calculateTtl(UpdatePeriodEnum updatePeriod, BigInteger updateFrequency){
258     if(updatePeriod == null && updateFrequency == null){
259       return null;
260     }
261     int freq = updateFrequency == null ? 1: updateFrequency.intValue();
262     if(updatePeriod == UpdatePeriodEnum.HOURLY){
263       return Math.max(1, 60/freq);
264     }else if(updatePeriod == UpdatePeriodEnum.DAILY){
265       return Math.max(1, MIN_PER_DAY/freq);
266     }else if(updatePeriod == UpdatePeriodEnum.MONTHLY){
267       return Math.max(1, MIN_PER_MONTH/freq);
268     }else if(updatePeriod == UpdatePeriodEnum.WEEKLY){
269       return Math.max(1, MIN_PER_WEEK/freq);
270     }else if(updatePeriod == UpdatePeriodEnum.YEARLY){
271       return Math.max(1, MIN_PER_YEAR/freq);
272     }else{
273       return null;
274     }
275   }
276 }