1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.deciderules;
26
27 import java.util.logging.Level;
28 import java.util.logging.Logger;
29
30 import org.archive.crawler.datamodel.CandidateURI;
31 import org.archive.util.TextUtils;
32
33
34
35 /***
36 * Rule applies configured decision to any CrawlURI class key -- i.e.
37 * {@link CandidateURI#getClassKey()} -- matches matches supplied regexp.
38 *
39 * @author gojomo
40 */
41 public class ClassKeyMatchesRegExpDecideRule extends MatchesRegExpDecideRule {
42
43 private static final long serialVersionUID = 1178873944436973294L;
44
45 private static final Logger logger =
46 Logger.getLogger(ClassKeyMatchesRegExpDecideRule.class.getName());
47
48 /***
49 * Usual constructor.
50 * @param name
51 */
52 public ClassKeyMatchesRegExpDecideRule(String name) {
53 super(name);
54 setDescription("ClassKeyMatchesRegExpDecideRule. " +
55 "Applies the configured " +
56 "decision to class keys matching the supplied " +
57 "regular expression. Class keys are values set into " +
58 "an URL by the Frontier. They are usually the names " +
59 "of queues used by the Frontier. Class keys can " +
60 "look like hostname + port or be plain IPs (It will " +
61 "depend on the Frontier implementation/configuration).");
62 }
63
64 /***
65 * Evaluate passed object.
66 * Test first that its CandidateURI. If so, does it have a class key.
67 * If not, ask frontier for its classkey. Then test against regex.
68 *
69 * @param object
70 * @return true if regexp is matched
71 */
72 protected boolean evaluate(Object object) {
73 try {
74 CandidateURI cauri = (CandidateURI)object;
75 String classKey = cauri.getClassKey();
76 if (classKey == null || classKey.length() <= 0) {
77 classKey = getSettingsHandler().getOrder().getController().
78 getFrontier().getClassKey(cauri);
79 cauri.setClassKey(classKey);
80 }
81 String regexp = getRegexp(cauri);
82 boolean result = (regexp == null)?
83 false: TextUtils.matches(regexp, cauri.getClassKey());
84 if (logger.isLoggable(Level.FINE)) {
85 logger.fine("Tested '" + cauri.getClassKey() +
86 "' match with regex '" + regexp + " and result was " +
87 result);
88 }
89 return result;
90 } catch (ClassCastException e) {
91
92 return false;
93 }
94 }
95 }