1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.deciderules;
26
27 import java.util.logging.Level;
28 import java.util.logging.Logger;
29
30 import javax.management.AttributeNotFoundException;
31
32 import org.archive.crawler.datamodel.CandidateURI;
33 import org.archive.crawler.settings.SimpleType;
34 import org.archive.util.TextUtils;
35
36 /***
37 * Rule applies configured decision to any CrawlURIs whose 'hops-path'
38 * (string like "LLXE" etc.) matches the supplied regexp.
39 *
40 * @author gojomo
41 */
42 public class HopsPathMatchesRegExpDecideRule extends PredicatedDecideRule {
43
44 private static final long serialVersionUID = -8881013912393934053L;
45
46 private static final Logger logger =
47 Logger.getLogger(HopsPathMatchesRegExpDecideRule.class.getName());
48
49 public static final String ATTR_REGEXP = "regexp";
50
51 /***
52 * Usual constructor.
53 * @param name
54 */
55 public HopsPathMatchesRegExpDecideRule(String name) {
56 super(name);
57 setDescription("HopsPathMatchesRegExpDecideRule. Applies the " +
58 "configured decision to URIs whose hops-path (string with " +
59 "L E R X P etc) matches the supplied regular expression.");
60 addElementToDefinition(new SimpleType(ATTR_REGEXP, "Java regular" +
61 "expression to match.", ""));
62 }
63
64 /***
65 * Evaluate whether given object (if CandidateURI) has hops-path
66 * matching configured regexp
67 *
68 * @param object
69 * @return true if regexp is matched
70 */
71 protected boolean evaluate(Object object) {
72 try {
73 String regexp = getRegexp(object);
74 String str = ((CandidateURI)object).getPathFromSeed();
75 boolean result = (regexp == null)?
76 false: TextUtils.matches(regexp, str);
77 if (logger.isLoggable(Level.FINE)) {
78 logger.fine("Tested '" + str + "' match with regex '" +
79 regexp + " and result was " + result);
80 }
81 return result;
82 } catch (ClassCastException e) {
83
84 return false;
85 }
86 }
87
88 /***
89 * Get the regular expression string to match the URI against.
90 *
91 * @param o the object for which the regular expression should be
92 * matched against.
93 * @return the regular expression to match against.
94 */
95 protected String getRegexp(Object o) {
96 try {
97 return (String) getAttribute(o, ATTR_REGEXP);
98 } catch (AttributeNotFoundException e) {
99 logger.severe(e.getMessage());
100 return null;
101 }
102 }
103 }