1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.deciderules;
24
25 import org.archive.crawler.settings.SimpleType;
26
27 public class ExceedsDocumentLengthTresholdDecideRule extends
28 NotExceedsDocumentLengthTresholdDecideRule {
29
30 private static final long serialVersionUID = -3008503096295212224L;
31
32 /***
33 * Usual constructor.
34 * @param name Name of this rule.
35 */
36 public ExceedsDocumentLengthTresholdDecideRule(String name) {
37 super(name);
38 setDescription("ExceedsDocumentLengthTresholdDecideRule. ACCEPTs URIs "+
39 "with content length exceeding a given treshold. "+
40 "Either examines HTTP header content length or " +
41 "actual downloaded content length and returns false " +
42 "for documents exceeding a given length treshold.");
43
44 addElementToDefinition(new SimpleType(ATTR_CONTENT_LENGTH_TRESHOLD,
45 "Min " +
46 "content-length this filter will allow to pass through. If -1, " +
47 "then no limit.", DEFAULT_CONTENT_LENGTH_TRESHOLD)); }
48
49 /***
50 * @param contentLength content length to check against treshold
51 * @param obj Context object.
52 * @return contentLength exceeding treshold?
53 */
54 protected Boolean makeDecision(int contentLength, Object obj) {
55 return contentLength > getContentLengthTreshold(obj);
56 }
57
58 /***
59 * @param obj Context object.
60 * @return content length threshold
61 */
62 protected int getContentLengthTreshold(Object obj) {
63 int len = ((Integer)getUncheckedAttribute(obj,
64 ATTR_CONTENT_LENGTH_TRESHOLD)).intValue();
65 return len == -1? 0: len;
66 }
67 }