1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.deciderules;
26
27 import java.util.Iterator;
28 import java.util.logging.Level;
29 import java.util.logging.Logger;
30
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.framework.Filter;
33 import org.archive.crawler.settings.MapType;
34 import org.archive.crawler.settings.SimpleType;
35
36
37 /***
38 * FilterDecideRule wraps a legacy Filter for use in DecideRule
39 * contexts.
40 *
41 * @author gojomo
42 */
43 public class FilterDecideRule extends DecideRule {
44 private static final long serialVersionUID = -3193099932171335572L;
45 private static Logger logger =
46 Logger.getLogger(FilterDecideRule.class.getName());
47
48 /***
49 * Filter(s) to apply. Package protections for testing.
50 */
51 MapType filters = null;
52 /*** Filters setting */
53 public final static String ATTR_FILTERS = "filters";
54
55 public final static String ATTR_TRUE_DECISION = "true-decision";
56 public final static String ATTR_FALSE_DECISION = "false-decision";
57 public final static String[] ALLOWED_TYPES =
58 new String[] {ACCEPT, PASS, REJECT};
59
60 /***
61 * Constructor.
62 * @param name Name of this rule.
63 */
64 public FilterDecideRule(String name) {
65 super(name);
66 setDescription("FilterDecideRule wraps legacy Filters, allowing " +
67 "them to be used in places expecting DecideRules.");
68
69 this.filters = (MapType) addElementToDefinition(
70 new MapType(ATTR_FILTERS, "Filters considered to determine " +
71 "decision. If any filter returns FALSE, the configured " +
72 "false-decision (usually REJECT) is applied. If no filter " +
73 "returns false, the configured true-decision (usually " +
74 "ACCEPT) is applied.", Filter.class));
75 addElementToDefinition(new SimpleType(ATTR_TRUE_DECISION,
76 "Decision applied if filters all return true. ",
77 ACCEPT, ALLOWED_TYPES));
78 addElementToDefinition(new SimpleType(ATTR_FALSE_DECISION,
79 "Decision applied if any filter returns false. ",
80 REJECT, ALLOWED_TYPES));
81 }
82
83 /***
84 * Make decision on passed <code>object</code>.
85 * @param object Object to rule on.
86 * @return {@link #ACCEPT}, {@link #REJECT}, or {@link #PASS}.
87 */
88 public Object decisionFor(Object object) {
89 if(! (object instanceof CrawlURI)) {
90 return PASS;
91 }
92 if (filtersAccept((CrawlURI) object)) {
93 return ((String) getUncheckedAttribute(object, ATTR_TRUE_DECISION))
94 .intern();
95 } else {
96 return ((String) getUncheckedAttribute(object, ATTR_FALSE_DECISION))
97 .intern();
98 }
99 }
100
101 /***
102 * Do all specified filters (if any) accept this CrawlURI?
103 *
104 * @param curi
105 * @return True if all filters accept this CrawlURI.
106 */
107 protected boolean filtersAccept(CrawlURI curi) {
108 return filtersAccept(this.filters, curi);
109 }
110
111 /***
112 * Do all specified filters (if any) accept this CrawlURI?
113 *
114 * @param curi
115 * @param fs Filters to process.
116 * @return True if all filters accept this CrawlURI.
117 */
118 protected boolean filtersAccept(MapType fs, CrawlURI curi) {
119 if (fs.isEmpty(curi)) {
120 return true;
121 }
122 for (Iterator i = fs.iterator(curi); i.hasNext();) {
123 Filter filter = (Filter)i.next();
124 if (!filter.accepts(curi)) {
125 if (logger.isLoggable(Level.INFO)) {
126 logger.info(filter + " rejected " + curi +
127 " in Processor " + getName());
128 }
129 return false;
130 }
131 }
132 return true;
133 }
134 }