View Javadoc

1   /* BeanShellDecideRule
2   *
3   * $Id: BeanShellDecideRule.java 6149 2009-03-02 22:52:51Z gojomo $
4   *
5   * Created on Aug 7, 2006
6   *
7   * Copyright (C) 2006 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25  package org.archive.crawler.deciderules;
26  
27  import java.io.File;
28  import java.io.IOException;
29  import java.util.Collections;
30  import java.util.HashMap;
31  import java.util.Map;
32  import java.util.logging.Level;
33  import java.util.logging.Logger;
34  
35  import org.archive.crawler.settings.SimpleType;
36  import org.archive.crawler.settings.Type;
37  
38  import bsh.EvalError;
39  import bsh.Interpreter;
40  
41  
42  /***
43   * Rule which runs a groovy script to make its decision. 
44   * 
45   * Script source may be provided via a file local to the crawler.
46   * 
47   * Variables available to the script include 'object' (the object to be
48   * evaluated, typically a CandidateURI or CrawlURI), 'self' 
49   * (this GroovyDecideRule instance), and 'controller' (the crawl's 
50   * CrawlController instance). 
51   *
52   * TODO: reduce copy & paste with GroovyProcessor
53   * 
54   * @author gojomo
55   */
56  public class BeanShellDecideRule extends DecideRule {
57  
58      private static final long serialVersionUID = -8433859929199308527L;
59  
60      private static final Logger logger =
61          Logger.getLogger(BeanShellDecideRule.class.getName());
62      
63      /*** setting for script file */
64      public final static String ATTR_SCRIPT_FILE = "script-file"; 
65  
66      /*** whether each thread should have its own script runner (true), or
67       * they should share a single script runner with synchronized access */
68      public final static String ATTR_ISOLATE_THREADS = "isolate-threads";
69  
70      protected ThreadLocal<Interpreter> threadInterpreter = 
71          new ThreadLocal<Interpreter>();;
72      protected Interpreter sharedInterpreter;
73      public Map<Object,Object> sharedMap = 
74          Collections.synchronizedMap(new HashMap<Object,Object>());
75      protected boolean initialized = false; 
76      
77      public BeanShellDecideRule(String name) {
78          super(name);
79          setDescription("BeanShellDecideRule. Runs the BeanShell script " +
80                  "source (supplied via a file path) against " +
81                  "the current URI. Source should define a script method " +
82                  "'decisionFor(object)' which will be passed the object" +
83                  "to be evaluated and returns one of self.ACCEPT, " +
84                  "self.REJECT, or self.PASS. " +
85                  "The script may access this BeanShellDecideRule via" +
86                  "the 'self' variable and the CrawlController via the " +
87                  "'controller' variable. Runs the groovy script source " +
88                  "(supplied via a file path) against the " +
89                  "current URI.");
90          Type t = addElementToDefinition(new SimpleType(ATTR_SCRIPT_FILE,
91                  "BeanShell script file", ""));
92          t.setOverrideable(false);
93          t = addElementToDefinition(new SimpleType(ATTR_ISOLATE_THREADS,
94                  "Whether each ToeThread should get its own independent " +
95                  "script context, or they should share synchronized access " +
96                  "to one context. Default is true, meaning each threads " +
97                  "gets its own isolated context.", true));
98          t.setOverrideable(false);
99      }
100 
101     public Object decisionFor(Object object) {
102         // depending on previous configuration, interpreter may 
103         // be local to this thread or shared
104         Interpreter interpreter = getInterpreter(); 
105         synchronized(interpreter) {
106             // synchronization is harmless for local thread interpreter,
107             // necessary for shared interpreter
108             try {
109                 interpreter.set("object",object);
110                 return interpreter.eval("decisionFor(object)");
111             } catch (EvalError e) {
112                 // TODO Auto-generated catch block
113                 e.printStackTrace();
114                 return PASS;
115             } 
116         }
117     }
118 
119     /***
120      * Get the proper Interpreter instance -- either shared or local 
121      * to this thread. 
122      * @return Interpreter to use
123      */
124     protected synchronized Interpreter getInterpreter() {
125         if(sharedInterpreter==null 
126            && !(Boolean)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
127             // initialize
128             sharedInterpreter = newInterpreter();
129         }
130         if(sharedInterpreter!=null) {
131             return sharedInterpreter;
132         }
133         Interpreter interpreter = threadInterpreter.get(); 
134         if(interpreter==null) {
135             interpreter = newInterpreter(); 
136             threadInterpreter.set(interpreter);
137         }
138         return interpreter; 
139     }
140 
141     /***
142      * Create a new Interpreter instance, preloaded with any supplied
143      * source file and the variables 'self' (this 
144      * BeanShellProcessor) and 'controller' (the CrawlController). 
145      * 
146      * @return  the new Interpreter instance
147      */
148     protected Interpreter newInterpreter() {
149         Interpreter interpreter = new Interpreter(); 
150         try {
151             interpreter.set("self", this);
152             interpreter.set("controller", getController());
153             
154             String filePath = (String) getUncheckedAttribute(null, ATTR_SCRIPT_FILE);
155             if(filePath.length()>0) {
156                 try {
157                     File file = getSettingsHandler().getPathRelativeToWorkingDirectory(filePath);
158                     interpreter.source(file.getPath());
159                 } catch (IOException e) {
160                     logger.log(Level.SEVERE,"unable to read script file",e);
161                 }
162             }
163         } catch (EvalError e) {
164             // TODO Auto-generated catch block
165             e.printStackTrace();
166         }
167         
168         return interpreter; 
169     }
170     
171     
172     /***
173      * Setup (or reset) Intepreter variables, as appropraite based on 
174      * thread-isolation setting. 
175      */
176     public void kickUpdate() {
177         // TODO make it so running state (tallies, etc.) isn't lost on changes
178         // unless unavoidable
179         if((Boolean)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
180             sharedInterpreter = null; 
181             threadInterpreter = new ThreadLocal<Interpreter>(); 
182         } else {
183             sharedInterpreter = newInterpreter(); 
184             threadInterpreter = null;
185         }
186     }
187 }