View Javadoc

1   /* FilterDecideRule
2   *
3   * $Id: DecideRule.java 4649 2006-09-25 17:16:55Z paul_jack $
4   *
5   * Created on Mar 15, 2007
6   *
7   * Copyright (C) 2007 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25  package org.archive.crawler.deciderules;
26  
27  import java.util.Iterator;
28  import java.util.logging.Level;
29  import java.util.logging.Logger;
30  
31  import org.archive.crawler.datamodel.CrawlURI;
32  import org.archive.crawler.framework.Filter;
33  import org.archive.crawler.settings.MapType;
34  import org.archive.crawler.settings.SimpleType;
35  
36  
37  /***
38   * FilterDecideRule wraps a legacy Filter for use in DecideRule 
39   * contexts. 
40   *
41   * @author gojomo
42   */
43  public class FilterDecideRule extends DecideRule {
44  	private static final long serialVersionUID = -3193099932171335572L;
45  	private static Logger logger =
46          Logger.getLogger(FilterDecideRule.class.getName());
47  
48      /***
49       * Filter(s) to apply. Package protections for testing.
50       */
51      MapType filters = null;
52      /*** Filters setting */
53      public final static String ATTR_FILTERS = "filters";
54      
55      public final static String ATTR_TRUE_DECISION = "true-decision";
56      public final static String ATTR_FALSE_DECISION = "false-decision";
57      public final static String[] ALLOWED_TYPES = 
58      	new String[] {ACCEPT, PASS, REJECT};
59  
60      /***
61       * Constructor.
62       * @param name Name of this rule.
63       */
64      public FilterDecideRule(String name) {
65          super(name);
66          setDescription("FilterDecideRule wraps legacy Filters, allowing " +
67          		"them to be used in places expecting DecideRules.");
68  
69          this.filters = (MapType) addElementToDefinition(
70              new MapType(ATTR_FILTERS, "Filters considered to determine " +
71                  "decision.  If any filter returns FALSE, the configured " +
72                  "false-decision (usually REJECT) is applied. If no filter " +
73                  "returns false, the configured true-decision (usually " +
74                  "ACCEPT) is applied.", Filter.class));
75          addElementToDefinition(new SimpleType(ATTR_TRUE_DECISION,
76                  "Decision applied if filters all return true. ", 
77                  ACCEPT, ALLOWED_TYPES));
78          addElementToDefinition(new SimpleType(ATTR_FALSE_DECISION,
79                  "Decision applied if any filter returns false. ", 
80                  REJECT, ALLOWED_TYPES));
81      }
82  
83      /***
84       * Make decision on passed <code>object</code>.
85       * @param object Object to rule on.
86       * @return {@link #ACCEPT}, {@link #REJECT}, or {@link #PASS}.
87       */
88      public Object decisionFor(Object object) {
89      	if(! (object instanceof CrawlURI)) {
90      		return PASS;
91      	}
92          if (filtersAccept((CrawlURI) object)) {
93  			return ((String) getUncheckedAttribute(object, ATTR_TRUE_DECISION))
94  					.intern();
95  		} else {
96  			return ((String) getUncheckedAttribute(object, ATTR_FALSE_DECISION))
97  					.intern();
98  		}
99      }
100 
101 	/***
102      * Do all specified filters (if any) accept this CrawlURI?
103      *
104      * @param curi
105      * @return True if all filters accept this CrawlURI.
106      */
107     protected boolean filtersAccept(CrawlURI curi) {
108         return filtersAccept(this.filters, curi);
109     }
110     
111     /***
112      * Do all specified filters (if any) accept this CrawlURI?
113      *
114      * @param curi
115      * @param fs Filters to process.
116      * @return True if all filters accept this CrawlURI.
117      */
118     protected boolean filtersAccept(MapType fs, CrawlURI curi) {
119         if (fs.isEmpty(curi)) {
120             return true;
121         }
122         for (Iterator i = fs.iterator(curi); i.hasNext();) {
123             Filter filter = (Filter)i.next();
124             if (!filter.accepts(curi)) {
125                 if (logger.isLoggable(Level.INFO)) {
126                     logger.info(filter + " rejected " + curi +
127                         " in Processor " + getName());
128                 }
129                 return false;
130             }
131         }
132         return true;
133     }
134 }