1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.archive.crawler.deciderules;
22
23 import org.archive.crawler.datamodel.CrawlURI;
24 import org.archive.util.TextUtils;
25
26 /***
27 * DecideRule whose decision is applied if the URI's content-type
28 * is present and matches the supplied regular expression.
29 *
30 * @author Olaf Freyer
31 */
32 public class ContentTypeMatchesRegExpDecideRule extends MatchesRegExpDecideRule{
33 private static final long serialVersionUID = -2066930281015155843L;
34
35 public ContentTypeMatchesRegExpDecideRule(String name) {
36 super(name);
37 setDescription("ContentTypeMatchesRegExpDecideRule. Applies the " +
38 "configured decision to URIs matching the supplied regular " +
39 "expression. Cannot be used until after fetcher processors. " +
40 "Only then is the Content-Type known. A good place for this " +
41 "rule is at the writer step processing. If the content-type " +
42 "is null, 301s usually have no content-type, this deciderule " +
43 "will PASS.");
44 }
45
46 @Override
47 protected boolean evaluate(Object o) {
48 if (!(o instanceof CrawlURI)) {
49 return false;
50 }
51 String content_type = ((CrawlURI)o).getContentType();
52 String regexp = getRegexp(o);
53 return (regexp == null || content_type == null)? false:
54 TextUtils.matches(getRegexp(o), content_type);
55 }
56 }