View Javadoc

1   /* $Id: ExceedsDocumentLengthTresholdDecideRule.java 4649 2006-09-25 17:16:55Z paul_jack $
2    * 
3    * Created on 28.8.2006
4    *
5    * Copyright (C) 2006 Olaf Freyer
6    * 
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    * 
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.deciderules;
24  
25  import org.archive.crawler.settings.SimpleType;
26  
27  public class ExceedsDocumentLengthTresholdDecideRule extends
28  NotExceedsDocumentLengthTresholdDecideRule {
29  
30      private static final long serialVersionUID = -3008503096295212224L;
31  
32      /***
33       * Usual constructor. 
34       * @param name Name of this rule.
35       */
36      public ExceedsDocumentLengthTresholdDecideRule(String name) {
37      	super(name);
38      	setDescription("ExceedsDocumentLengthTresholdDecideRule. ACCEPTs URIs "+
39               "with content length exceeding a given treshold. "+
40               "Either examines HTTP header content length or " +
41               "actual downloaded content length and returns false " +
42               "for documents exceeding a given length treshold.");
43  
44          addElementToDefinition(new SimpleType(ATTR_CONTENT_LENGTH_TRESHOLD,
45          	"Min " +
46      	    "content-length this filter will allow to pass through. If -1, " +
47      	    "then no limit.", DEFAULT_CONTENT_LENGTH_TRESHOLD));    }
48      
49      /***
50       * @param contentLength content length to check against treshold
51       * @param obj Context object.
52       * @return contentLength exceeding treshold?
53       */
54      protected Boolean makeDecision(int contentLength, Object obj) {
55      	return contentLength > getContentLengthTreshold(obj);
56      }
57      
58      /***
59       * @param obj Context object.
60       * @return content length threshold
61       */
62      protected int getContentLengthTreshold(Object obj) {
63          int len = ((Integer)getUncheckedAttribute(obj,
64          		ATTR_CONTENT_LENGTH_TRESHOLD)).intValue();
65          return len == -1? 0: len;
66      }
67  }