1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package org.archive.util;
28
29 import java.io.BufferedReader;
30 import java.io.IOException;
31 import java.io.InputStream;
32 import java.io.InputStreamReader;
33 import java.io.PrintWriter;
34 import java.io.StringWriter;
35 import java.text.NumberFormat;
36 import java.text.ParseException;
37 import java.text.SimpleDateFormat;
38 import java.util.Calendar;
39 import java.util.Date;
40 import java.util.GregorianCalendar;
41 import java.util.HashSet;
42 import java.util.Locale;
43 import java.util.Set;
44 import java.util.TimeZone;
45 import java.util.logging.Level;
46 import java.util.logging.Logger;
47
48 import org.apache.commons.io.IOUtils;
49
50 /***
51 * Miscellaneous useful methods.
52 *
53 * @contributor gojomo & others
54 */
55 public class ArchiveUtils {
56 private static final Logger LOGGER = Logger.getLogger(ArchiveUtils.class.getName());
57
58 /***
59 * Arc-style date stamp in the format yyyyMMddHHmm and UTC time zone.
60 */
61 private static final ThreadLocal<SimpleDateFormat>
62 TIMESTAMP12 = threadLocalDateFormat("yyyyMMddHHmm");;
63
64 /***
65 * Arc-style date stamp in the format yyyyMMddHHmmss and UTC time zone.
66 */
67 private static final ThreadLocal<SimpleDateFormat>
68 TIMESTAMP14 = threadLocalDateFormat("yyyyMMddHHmmss");
69 /***
70 * Arc-style date stamp in the format yyyyMMddHHmmssSSS and UTC time zone.
71 */
72 private static final ThreadLocal<SimpleDateFormat>
73 TIMESTAMP17 = threadLocalDateFormat("yyyyMMddHHmmssSSS");
74
75 /***
76 * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
77 * UTC time zone is assumed.
78 */
79 private static final ThreadLocal<SimpleDateFormat>
80 TIMESTAMP17ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
81
82 /***
83 * Log-style date stamp in the format yyyy-MM-dd'T'HH:mm:ss'Z'
84 * UTC time zone is assumed.
85 */
86 private static final ThreadLocal<SimpleDateFormat>
87 TIMESTAMP14ISO8601Z = threadLocalDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
88
89 /***
90 * Default character to use padding strings.
91 */
92 private static final char DEFAULT_PAD_CHAR = ' ';
93
94 /*** milliseconds in an hour */
95 private static final int HOUR_IN_MS = 60 * 60 * 1000;
96 /*** milliseconds in a day */
97 private static final int DAY_IN_MS = 24 * HOUR_IN_MS;
98
99 private static ThreadLocal<SimpleDateFormat> threadLocalDateFormat(final String pattern) {
100 ThreadLocal<SimpleDateFormat> tl = new ThreadLocal<SimpleDateFormat>() {
101 protected SimpleDateFormat initialValue() {
102 SimpleDateFormat df = new SimpleDateFormat(pattern);
103 df.setTimeZone(TimeZone.getTimeZone("GMT"));
104 return df;
105 }
106 };
107 return tl;
108 }
109
110 public static int MAX_INT_CHAR_WIDTH =
111 Integer.toString(Integer.MAX_VALUE).length();
112
113 /***
114 * Utility function for creating arc-style date stamps
115 * in the format yyyMMddHHmmssSSS.
116 * Date stamps are in the UTC time zone
117 * @return the date stamp
118 */
119 public static String get17DigitDate(){
120 return TIMESTAMP17.get().format(new Date());
121 }
122
123 /***
124 * Utility function for creating arc-style date stamps
125 * in the format yyyMMddHHmmss.
126 * Date stamps are in the UTC time zone
127 * @return the date stamp
128 */
129 public static String get14DigitDate(){
130 return TIMESTAMP14.get().format(new Date());
131 }
132
133 /***
134 * Utility function for creating arc-style date stamps
135 * in the format yyyMMddHHmm.
136 * Date stamps are in the UTC time zone
137 * @return the date stamp
138 */
139 public static String get12DigitDate(){
140 return TIMESTAMP12.get().format(new Date());
141 }
142
143 /***
144 * Utility function for creating log timestamps, in
145 * W3C/ISO8601 format, assuming UTC. Use current time.
146 *
147 * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
148 *
149 * @return the date stamp
150 */
151 public static String getLog17Date(){
152 return TIMESTAMP17ISO8601Z.get().format(new Date());
153 }
154
155 /***
156 * Utility function for creating log timestamps, in
157 * W3C/ISO8601 format, assuming UTC.
158 *
159 * Format is yyyy-MM-dd'T'HH:mm:ss.SSS'Z'
160 * @param date Date to format.
161 *
162 * @return the date stamp
163 */
164 public static String getLog17Date(long date){
165 return TIMESTAMP17ISO8601Z.get().format(new Date(date));
166 }
167
168 /***
169 * Utility function for creating log timestamps, in
170 * W3C/ISO8601 format, assuming UTC. Use current time.
171 *
172 * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
173 *
174 * @return the date stamp
175 */
176 public static String getLog14Date(){
177 return TIMESTAMP14ISO8601Z.get().format(new Date());
178 }
179
180 /***
181 * Utility function for creating log timestamps, in
182 * W3C/ISO8601 format, assuming UTC.
183 *
184 * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
185 * @param date long timestamp to format.
186 *
187 * @return the date stamp
188 */
189 public static String getLog14Date(long date){
190 return TIMESTAMP14ISO8601Z.get().format(new Date(date));
191 }
192
193 /***
194 * Utility function for creating log timestamps, in
195 * W3C/ISO8601 format, assuming UTC.
196 *
197 * Format is yyyy-MM-dd'T'HH:mm:ss'Z'
198 * @param date Date to format.
199 *
200 * @return the date stamp
201 */
202 public static String getLog14Date(Date date){
203 return TIMESTAMP14ISO8601Z.get().format(date);
204 }
205
206 /***
207 * Utility function for creating arc-style date stamps
208 * in the format yyyyMMddHHmmssSSS.
209 * Date stamps are in the UTC time zone
210 *
211 * @param date milliseconds since epoc
212 * @return the date stamp
213 */
214 public static String get17DigitDate(long date){
215 return TIMESTAMP17.get().format(new Date(date));
216 }
217
218 public static String get17DigitDate(Date date){
219 return TIMESTAMP17.get().format(date);
220 }
221
222 /***
223 * Utility function for creating arc-style date stamps
224 * in the format yyyyMMddHHmmss.
225 * Date stamps are in the UTC time zone
226 *
227 * @param date milliseconds since epoc
228 * @return the date stamp
229 */
230 public static String get14DigitDate(long date){
231 return TIMESTAMP14.get().format(new Date(date));
232 }
233
234 public static String get14DigitDate(Date d) {
235 return TIMESTAMP14.get().format(d);
236 }
237
238 /***
239 * Utility function for creating arc-style date stamps
240 * in the format yyyyMMddHHmm.
241 * Date stamps are in the UTC time zone
242 *
243 * @param date milliseconds since epoc
244 * @return the date stamp
245 */
246 public static String get12DigitDate(long date){
247 return TIMESTAMP12.get().format(new Date(date));
248 }
249
250 public static String get12DigitDate(Date d) {
251 return TIMESTAMP12.get().format(d);
252 }
253
254 /***
255 * Parses an ARC-style date. If passed String is < 12 characters in length,
256 * we pad. At a minimum, String should contain a year (>=4 characters).
257 * Parse will also fail if day or month are incompletely specified. Depends
258 * on the above getXXDigitDate methods.
259 * @param A 4-17 digit date in ARC style (<code>yyyy</code> to
260 * <code>yyyyMMddHHmmssSSS</code>) formatting.
261 * @return A Date object representing the passed String.
262 * @throws ParseException
263 */
264 public static Date getDate(String d) throws ParseException {
265 Date date = null;
266 if (d == null) {
267 throw new IllegalArgumentException("Passed date is null");
268 }
269 switch (d.length()) {
270 case 14:
271 date = ArchiveUtils.parse14DigitDate(d);
272 break;
273
274 case 17:
275 date = ArchiveUtils.parse17DigitDate(d);
276 break;
277
278 case 12:
279 date = ArchiveUtils.parse12DigitDate(d);
280 break;
281
282 case 0:
283 case 1:
284 case 2:
285 case 3:
286 throw new ParseException("Date string must at least contain a" +
287 "year: " + d, d.length());
288
289 default:
290 if (!(d.startsWith("19") || d.startsWith("20"))) {
291 throw new ParseException("Unrecognized century: " + d, 0);
292 }
293 if (d.length() < 8 && (d.length() % 2) != 0) {
294 throw new ParseException("Incomplete month/date: " + d,
295 d.length());
296 }
297 StringBuilder sb = new StringBuilder(d);
298 if (sb.length() < 8) {
299 for (int i = sb.length(); sb.length() < 8; i += 2) {
300 sb.append("01");
301 }
302 }
303 if (sb.length() < 12) {
304 for (int i = sb.length(); sb.length() < 12; i++) {
305 sb.append("0");
306 }
307 }
308 date = ArchiveUtils.parse12DigitDate(sb.toString());
309 }
310
311 return date;
312 }
313
314 /***
315 * Utility function for parsing arc-style date stamps
316 * in the format yyyMMddHHmmssSSS.
317 * Date stamps are in the UTC time zone. The whole string will not be
318 * parsed, only the first 17 digits.
319 *
320 * @param date an arc-style formatted date stamp
321 * @return the Date corresponding to the date stamp string
322 * @throws ParseException if the inputstring was malformed
323 */
324 public static Date parse17DigitDate(String date) throws ParseException {
325 return TIMESTAMP17.get().parse(date);
326 }
327
328 /***
329 * Utility function for parsing arc-style date stamps
330 * in the format yyyMMddHHmmss.
331 * Date stamps are in the UTC time zone. The whole string will not be
332 * parsed, only the first 14 digits.
333 *
334 * @param date an arc-style formatted date stamp
335 * @return the Date corresponding to the date stamp string
336 * @throws ParseException if the inputstring was malformed
337 */
338 public static Date parse14DigitDate(String date) throws ParseException{
339 return TIMESTAMP14.get().parse(date);
340 }
341
342 /***
343 * Utility function for parsing arc-style date stamps
344 * in the format yyyMMddHHmm.
345 * Date stamps are in the UTC time zone. The whole string will not be
346 * parsed, only the first 12 digits.
347 *
348 * @param date an arc-style formatted date stamp
349 * @return the Date corresponding to the date stamp string
350 * @throws ParseException if the inputstring was malformed
351 */
352 public static Date parse12DigitDate(String date) throws ParseException{
353 return TIMESTAMP12.get().parse(date);
354 }
355
356 /***
357 * Convert 17-digit date format timestamps (as found in crawl.log, for
358 * example) into a GregorianCalendar object. + * Useful so you can convert
359 * into milliseconds-since-epoch. Note: it is possible to compute
360 * milliseconds-since-epoch + * using {@link #parse17DigitDate}.UTC(), but
361 * that method is deprecated in favor of using Calendar.getTimeInMillis(). + *
362 * <p/>I probably should have dug into all the utility methods in
363 * DateFormat.java to parse the timestamp, but this was + * easier. If
364 * someone wants to fix this to use those methods, please have at it! <p/>
365 * Mike Schwartz, schwartz at CodeOnTheRoad dot com.
366 *
367 * @param timestamp17String
368 * @return Calendar set to <code>timestamp17String</code>.
369 */
370 public static Calendar timestamp17ToCalendar(String timestamp17String) {
371 GregorianCalendar calendar = new GregorianCalendar();
372 int year = Integer.parseInt(timestamp17String.substring(0, 4));
373 int dayOfMonth = Integer.parseInt(timestamp17String.substring(6, 8));
374
375 int month = Integer.parseInt(timestamp17String.substring(4, 6)) - 1;
376 int hourOfDay = Integer.parseInt(timestamp17String.substring(8, 10));
377 int minute = Integer.parseInt(timestamp17String.substring(10, 12));
378 int second = Integer.parseInt(timestamp17String.substring(12, 14));
379 int milliseconds = Integer
380 .parseInt(timestamp17String.substring(14, 17));
381 calendar.set(Calendar.YEAR, year);
382 calendar.set(Calendar.MONTH, month);
383 calendar.set(Calendar.DAY_OF_MONTH, dayOfMonth);
384 calendar.set(Calendar.HOUR_OF_DAY, hourOfDay);
385 calendar.set(Calendar.MINUTE, minute);
386 calendar.set(Calendar.SECOND, second);
387 calendar.set(Calendar.MILLISECOND, milliseconds);
388 return calendar;
389 }
390
391 /***
392 * @param timestamp A 14-digit timestamp or the suffix for a 14-digit
393 * timestamp: E.g. '20010909014640' or '20010101' or '1970'.
394 * @return Seconds since the epoch as a string zero-pre-padded so always
395 * Integer.MAX_VALUE wide (Makes it so sorting of resultant string works
396 * properly).
397 * @throws ParseException
398 */
399 public static String secondsSinceEpoch(String timestamp)
400 throws ParseException {
401 return zeroPadInteger((int)
402 (getSecondsSinceEpoch(timestamp).getTime()/1000));
403 }
404
405 /***
406 * @param timestamp A 14-digit timestamp or the suffix for a 14-digit
407 * timestamp: E.g. '20010909014640' or '20010101' or '1970'.
408 * @return A date.
409 * @see #secondsSinceEpoch(String)
410 * @throws ParseException
411 */
412 public static Date getSecondsSinceEpoch(String timestamp)
413 throws ParseException {
414 if (timestamp.length() < 14) {
415 if (timestamp.length() < 10 && (timestamp.length() % 2) == 1) {
416 throw new IllegalArgumentException("Must have year, " +
417 "month, date, hour or second granularity: " + timestamp);
418 }
419 if (timestamp.length() == 4) {
420
421 timestamp = timestamp + "01010000";
422 }
423 if (timestamp.length() == 6) {
424
425 timestamp = timestamp + "010000";
426 }
427 if (timestamp.length() < 14) {
428 timestamp = timestamp +
429 ArchiveUtils.padTo("", 14 - timestamp.length(), '0');
430 }
431 }
432 return ArchiveUtils.parse14DigitDate(timestamp);
433 }
434
435 /***
436 * @param i Integer to add prefix of zeros too. If passed
437 * 2005, will return the String <code>0000002005</code>. String
438 * width is the width of Integer.MAX_VALUE as a string (10
439 * digits).
440 * @return Padded String version of <code>i</code>.
441 */
442 public static String zeroPadInteger(int i) {
443 return ArchiveUtils.padTo(Integer.toString(i),
444 MAX_INT_CHAR_WIDTH, '0');
445 }
446
447 /***
448 * Convert an <code>int</code> to a <code>String</code>, and pad it to
449 * <code>pad</code> spaces.
450 * @param i the int
451 * @param pad the width to pad to.
452 * @return String w/ padding.
453 */
454 public static String padTo(final int i, final int pad) {
455 String n = Integer.toString(i);
456 return padTo(n, pad);
457 }
458
459 /***
460 * Pad the given <code>String</code> to <code>pad</code> characters wide
461 * by pre-pending spaces. <code>s</code> should not be <code>null</code>.
462 * If <code>s</code> is already wider than <code>pad</code> no change is
463 * done.
464 *
465 * @param s the String to pad
466 * @param pad the width to pad to.
467 * @return String w/ padding.
468 */
469 public static String padTo(final String s, final int pad) {
470 return padTo(s, pad, DEFAULT_PAD_CHAR);
471 }
472
473 /***
474 * Pad the given <code>String</code> to <code>pad</code> characters wide
475 * by pre-pending <code>padChar</code>.
476 *
477 * <code>s</code> should not be <code>null</code>. If <code>s</code> is
478 * already wider than <code>pad</code> no change is done.
479 *
480 * @param s the String to pad
481 * @param pad the width to pad to.
482 * @param padChar The pad character to use.
483 * @return String w/ padding.
484 */
485 public static String padTo(final String s, final int pad,
486 final char padChar) {
487 String result = s;
488 int l = s.length();
489 if (l < pad) {
490 StringBuffer sb = new StringBuffer(pad);
491 while(l < pad) {
492 sb.append(padChar);
493 l++;
494 }
495 sb.append(s);
496 result = sb.toString();
497 }
498 return result;
499 }
500
501 /*** check that two byte arrays are equal. They may be <code>null</code>.
502 *
503 * @param lhs a byte array
504 * @param rhs another byte array.
505 * @return <code>true</code> if they are both equal (or both
506 * <code>null</code>)
507 */
508 public static boolean byteArrayEquals(final byte[] lhs, final byte[] rhs) {
509 if (lhs == null && rhs != null || lhs != null && rhs == null) {
510 return false;
511 }
512 if (lhs==rhs) {
513 return true;
514 }
515 if (lhs.length != rhs.length) {
516 return false;
517 }
518 for(int i = 0; i<lhs.length; i++) {
519 if (lhs[i]!=rhs[i]) {
520 return false;
521 }
522 }
523 return true;
524 }
525
526 /***
527 * Converts a double to a string.
528 * @param val The double to convert
529 * @param precision How many characters to include after '.'
530 * @return the double as a string.
531 */
532 public static String doubleToString(double val, int maxFractionDigits){
533 return doubleToString(val, maxFractionDigits, 0);
534 }
535
536 private static String doubleToString(double val, int maxFractionDigits, int minFractionDigits) {
537 NumberFormat f = NumberFormat.getNumberInstance(Locale.US);
538 f.setMaximumFractionDigits(maxFractionDigits);
539 f.setMinimumFractionDigits(minFractionDigits);
540 return f.format(val);
541 }
542
543 /***
544 * Takes a byte size and formats it for display with 'friendly' units.
545 * <p>
546 * This involves converting it to the largest unit
547 * (of B, KB, MB, GB, TB) for which the amount will be > 1.
548 * <p>
549 * Additionally, at least 2 significant digits are always displayed.
550 * <p>
551 * Displays as bytes (B): 0-1023
552 * Displays as kilobytes (KB): 1024 - 2097151 (~2Mb)
553 * Displays as megabytes (MB): 2097152 - 4294967295 (~4Gb)
554 * Displays as gigabytes (GB): 4294967296 - infinity
555 * <p>
556 * Negative numbers will be returned as '0 B'.
557 *
558 * @param amount the amount of bytes
559 * @return A string containing the amount, properly formated.
560 */
561 public static String formatBytesForDisplay(long amount) {
562 double displayAmount = (double) amount;
563 int unitPowerOf1024 = 0;
564
565 if(amount <= 0){
566 return "0 B";
567 }
568
569 while(displayAmount>=1024 && unitPowerOf1024 < 4) {
570 displayAmount = displayAmount / 1024;
571 unitPowerOf1024++;
572 }
573
574
575 final String[] units = { " B", " KB", " MB", " GB", " TB" };
576
577
578 int fractionDigits = (displayAmount < 10) ? 1 : 0;
579 return doubleToString(displayAmount, fractionDigits, fractionDigits)
580 + units[unitPowerOf1024];
581 }
582
583 /***
584 * Convert milliseconds value to a human-readable duration
585 * @param time
586 * @return Human readable string version of passed <code>time</code>
587 */
588 public static String formatMillisecondsToConventional(long time) {
589 return formatMillisecondsToConventional(time,true);
590 }
591
592 /***
593 * Convert milliseconds value to a human-readable duration
594 * @param time
595 * @param toMs whether to print to the ms
596 * @return Human readable string version of passed <code>time</code>
597 */
598 public static String formatMillisecondsToConventional(long time, boolean toMs) {
599 StringBuffer sb = new StringBuffer();
600 if(time<0) {
601 sb.append("-");
602 }
603 long absTime = Math.abs(time);
604 if(!toMs && absTime < 1000) {
605 return "0s";
606 }
607 if(absTime > DAY_IN_MS) {
608
609 sb.append(absTime / DAY_IN_MS + "d");
610 absTime = absTime % DAY_IN_MS;
611 }
612 if (absTime > HOUR_IN_MS) {
613
614 sb.append(absTime / HOUR_IN_MS + "h");
615 absTime = absTime % HOUR_IN_MS;
616 }
617 if (absTime > 60000) {
618 sb.append(absTime / 60000 + "m");
619 absTime = absTime % 60000;
620 }
621 if (absTime > 1000) {
622 sb.append(absTime / 1000 + "s");
623 absTime = absTime % 1000;
624 }
625 if(toMs) {
626 sb.append(absTime + "ms");
627 }
628 return sb.toString();
629 }
630
631
632 /***
633 * Generate a long UID based on the given class and version number.
634 * Using this instead of the default will assume serialization
635 * compatibility across class changes unless version number is
636 * intentionally bumped.
637 *
638 * @param class1
639 * @param version
640 * @return UID based off class and version number.
641 */
642 public static long classnameBasedUID(Class<?> class1, int version) {
643 String callingClassname = class1.getName();
644 return (long)callingClassname.hashCode() << 32 + version;
645 }
646
647 /***
648 * Copy the raw bytes of a long into a byte array, starting at
649 * the specified offset.
650 *
651 * @param l
652 * @param array
653 * @param offset
654 */
655 public static void longIntoByteArray(long l, byte[] array, int offset) {
656 int i, shift;
657
658 for(i = 0, shift = 56; i < 8; i++, shift -= 8)
659 array[offset+i] = (byte)(0xFF & (l >> shift));
660 }
661
662 public static long byteArrayIntoLong(byte [] bytearray) {
663 return byteArrayIntoLong(bytearray, 0);
664 }
665
666 /***
667 * Byte array into long.
668 * @param bytearray Array to convert to a long.
669 * @param offset Offset into array at which we start decoding the long.
670 * @return Long made of the bytes of <code>array</code> beginning at
671 * offset <code>offset</code>.
672 * @see #longIntoByteArray(long, byte[], int)
673 */
674 public static long byteArrayIntoLong(byte [] bytearray,
675 int offset) {
676 long result = 0;
677 for (int i = offset; i < 8
678 result = (result << 8
679 (0xff & (byte)(bytearray[i] & 0xff));
680 }
681 return result;
682 }
683
684 /***
685 * Given a string that may be a plain host or host/path (without
686 * URI scheme), add an implied http:// if necessary.
687 *
688 * @param u string to evaluate
689 * @return string with http:// added if no scheme already present
690 */
691 public static String addImpliedHttpIfNecessary(String u) {
692 if(u.indexOf(':') == -1 || u.indexOf('.') < u.indexOf(':')) {
693
694 u = "http://" + u;
695 }
696 return u;
697 }
698
699 /***
700 * Verify that the array begins with the prefix.
701 *
702 * @param array
703 * @param prefix
704 * @return true if array is identical to prefix for the first prefix.length
705 * positions
706 */
707 public static boolean startsWith(byte[] array, byte[] prefix) {
708 if(prefix.length>array.length) {
709 return false;
710 }
711 for(int i = 0; i < prefix.length; i++) {
712 if(array[i]!=prefix[i]) {
713 return false;
714 }
715 }
716 return true;
717 }
718
719 /***
720 * Utility method to get a String singleLineReport from Reporter
721 * @param rep Reporter to get singleLineReport from
722 * @return String of report
723 */
724 public static String singleLineReport(Reporter rep) {
725 StringWriter sw = new StringWriter();
726 PrintWriter pw = new PrintWriter(sw);
727 try {
728 rep.singleLineReportTo(pw);
729 } catch (IOException e) {
730
731 e.printStackTrace();
732 }
733 pw.flush();
734 return sw.toString();
735 }
736
737 /***
738 * Compose the requested report into a String. DANGEROUS IF REPORT
739 * CAN BE LARGE.
740 *
741 * @param rep Reported
742 * @param name String name of report to compose
743 * @return String of report
744 */
745 public static String writeReportToString(Reporter rep, String name) {
746 StringWriter sw = new StringWriter();
747 PrintWriter pw = new PrintWriter(sw);
748 rep.reportTo(name,pw);
749 pw.flush();
750 return sw.toString();
751 }
752
753 public static Set<String> TLDS;
754
755 static {
756 TLDS = new HashSet<String>();
757 InputStream is = ArchiveUtils.class.getResourceAsStream("tlds-alpha-by-domain.txt");
758 try {
759 BufferedReader reader = new BufferedReader(new InputStreamReader(is));
760 String line;
761 while((line = reader.readLine())!=null) {
762 if (line.startsWith("#")) {
763 continue;
764 }
765 TLDS.add(line.trim().toLowerCase());
766 }
767 } catch (Exception e) {
768 LOGGER.log(Level.SEVERE,"TLD list unavailable",e);
769 } finally {
770 IOUtils.closeQuietly(is);
771 }
772 }
773 /***
774 * Return whether the given string represents a known
775 * top-level-domain (like "com", "org", etc.) per IANA
776 * as of 20100419
777 *
778 * @param dom candidate string
779 * @return boolean true if recognized as TLD
780 */
781 public static boolean isTld(String dom) {
782 return TLDS.contains(dom.toLowerCase());
783 }
784 }
785