1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.deciderules;
24
25 import java.util.logging.Level;
26 import java.util.logging.Logger;
27 import javax.management.AttributeNotFoundException;
28
29 import org.archive.crawler.datamodel.CrawlURI;
30 import org.archive.crawler.settings.SimpleType;
31 import org.archive.util.TextUtils;
32
33
34 public class FetchStatusMatchesRegExpDecideRule extends PredicatedDecideRule {
35
36 private static final long serialVersionUID = -3088156729860241312L;
37
38 private final Logger logger = Logger.getLogger(this.getClass().getName());
39
40 public static final String ATTR_REGEXP = "regexp";
41
42 /***
43 * Usual constructor.
44 * @param name Name of this DecideRule.
45 */
46 public FetchStatusMatchesRegExpDecideRule(String name) {
47 super(name);
48 setDescription("FetchStatusMatchesRegExpDecideRule. Applies " +
49 "configured decision to any URI that has a fetch status matching " +
50 "the given regular expression.");
51 addElementToDefinition(new SimpleType(ATTR_REGEXP, "Java regular" +
52 "expression to match.", ""));
53 }
54
55 protected boolean evaluate(Object object) {
56 try {
57 String regexp = getRegexp(object);
58 CrawlURI curi = (CrawlURI)object;
59 String str = String.valueOf(curi.getFetchStatus());
60 boolean result = (regexp == null)?
61 false: TextUtils.matches(regexp, str);
62 if (logger.isLoggable(Level.FINE)) {
63 logger.fine("Tested '" + str + "' match with regex '" +
64 regexp + " and result was " + result);
65 }
66 return result;
67 } catch (ClassCastException e) {
68
69 return false;
70 }
71 }
72
73 /***
74 * Get the regular expression string to match the URI against.
75 *
76 * @param o the object for which the regular expression should be
77 * matched against.
78 * @return the regular expression to match against.
79 */
80 protected String getRegexp(Object o) {
81 try {
82 return (String) getAttribute(o, ATTR_REGEXP);
83 } catch (AttributeNotFoundException e) {
84 logger.severe(e.getMessage());
85 return null;
86 }
87 }
88 }