1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 package org.archive.crawler.deciderules;
23
24 import org.archive.crawler.datamodel.CrawlURI;
25
26 /***
27 * DecideRule whose decision is applied if the URI's content-type
28 * is present and does not match the supplied regular expression.
29 *
30 * @author Olaf Freyer
31 */
32 public class ContentTypeNotMatchesRegExpDecideRule extends
33 ContentTypeMatchesRegExpDecideRule {
34 private static final long serialVersionUID = 4729800377757426137L;
35
36 public ContentTypeNotMatchesRegExpDecideRule(String name) {
37 super(name);
38 setDescription("ContentTypeNotMatchesRegExpDecideRule. Applies the " +
39 "configured decision to URIs not matching the supplied regular " +
40 "expression. Cannot be used until after fetcher processors. " +
41 "Only then is the Content-Type known. A good place for this " +
42 "rule is at the writer step processing. If the content-type " +
43 "is null, 301s usually have no content-type, this deciderule " +
44 "will PASS.");
45 }
46
47 /***
48 * Evaluate whether given object's string version does not match
49 * configured regexp (by reversing the superclass's answer).
50 *
51 * @param object Object to make decision about.
52 * @return true if the regexp is not matched
53 */
54 protected boolean evaluate(Object o) {
55 if (!(o instanceof CrawlURI)) {
56 return false;
57 }
58 String content_type = ((CrawlURI)o).getContentType();
59 String regexp = getRegexp(o);
60 return (regexp == null || content_type == null)? false:
61 ! super.evaluate(o);
62 }
63
64 }