1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.deciderules;
26
27 import java.io.File;
28 import java.io.IOException;
29 import java.util.Collections;
30 import java.util.HashMap;
31 import java.util.Map;
32 import java.util.logging.Level;
33 import java.util.logging.Logger;
34
35 import org.archive.crawler.settings.SimpleType;
36 import org.archive.crawler.settings.Type;
37
38 import bsh.EvalError;
39 import bsh.Interpreter;
40
41
42 /***
43 * Rule which runs a groovy script to make its decision.
44 *
45 * Script source may be provided via a file local to the crawler.
46 *
47 * Variables available to the script include 'object' (the object to be
48 * evaluated, typically a CandidateURI or CrawlURI), 'self'
49 * (this GroovyDecideRule instance), and 'controller' (the crawl's
50 * CrawlController instance).
51 *
52 * TODO: reduce copy & paste with GroovyProcessor
53 *
54 * @author gojomo
55 */
56 public class BeanShellDecideRule extends DecideRule {
57
58 private static final long serialVersionUID = -8433859929199308527L;
59
60 private static final Logger logger =
61 Logger.getLogger(BeanShellDecideRule.class.getName());
62
63 /*** setting for script file */
64 public final static String ATTR_SCRIPT_FILE = "script-file";
65
66 /*** whether each thread should have its own script runner (true), or
67 * they should share a single script runner with synchronized access */
68 public final static String ATTR_ISOLATE_THREADS = "isolate-threads";
69
70 protected ThreadLocal<Interpreter> threadInterpreter =
71 new ThreadLocal<Interpreter>();;
72 protected Interpreter sharedInterpreter;
73 public Map<Object,Object> sharedMap =
74 Collections.synchronizedMap(new HashMap<Object,Object>());
75 protected boolean initialized = false;
76
77 public BeanShellDecideRule(String name) {
78 super(name);
79 setDescription("BeanShellDecideRule. Runs the BeanShell script " +
80 "source (supplied via a file path) against " +
81 "the current URI. Source should define a script method " +
82 "'decisionFor(object)' which will be passed the object" +
83 "to be evaluated and returns one of self.ACCEPT, " +
84 "self.REJECT, or self.PASS. " +
85 "The script may access this BeanShellDecideRule via" +
86 "the 'self' variable and the CrawlController via the " +
87 "'controller' variable. Runs the groovy script source " +
88 "(supplied via a file path) against the " +
89 "current URI.");
90 Type t = addElementToDefinition(new SimpleType(ATTR_SCRIPT_FILE,
91 "BeanShell script file", ""));
92 t.setOverrideable(false);
93 t = addElementToDefinition(new SimpleType(ATTR_ISOLATE_THREADS,
94 "Whether each ToeThread should get its own independent " +
95 "script context, or they should share synchronized access " +
96 "to one context. Default is true, meaning each threads " +
97 "gets its own isolated context.", true));
98 t.setOverrideable(false);
99 }
100
101 public Object decisionFor(Object object) {
102
103
104 Interpreter interpreter = getInterpreter();
105 synchronized(interpreter) {
106
107
108 try {
109 interpreter.set("object",object);
110 return interpreter.eval("decisionFor(object)");
111 } catch (EvalError e) {
112
113 e.printStackTrace();
114 return PASS;
115 }
116 }
117 }
118
119 /***
120 * Get the proper Interpreter instance -- either shared or local
121 * to this thread.
122 * @return Interpreter to use
123 */
124 protected synchronized Interpreter getInterpreter() {
125 if(sharedInterpreter==null
126 && !(Boolean)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
127
128 sharedInterpreter = newInterpreter();
129 }
130 if(sharedInterpreter!=null) {
131 return sharedInterpreter;
132 }
133 Interpreter interpreter = threadInterpreter.get();
134 if(interpreter==null) {
135 interpreter = newInterpreter();
136 threadInterpreter.set(interpreter);
137 }
138 return interpreter;
139 }
140
141 /***
142 * Create a new Interpreter instance, preloaded with any supplied
143 * source file and the variables 'self' (this
144 * BeanShellProcessor) and 'controller' (the CrawlController).
145 *
146 * @return the new Interpreter instance
147 */
148 protected Interpreter newInterpreter() {
149 Interpreter interpreter = new Interpreter();
150 try {
151 interpreter.set("self", this);
152 interpreter.set("controller", getController());
153
154 String filePath = (String) getUncheckedAttribute(null, ATTR_SCRIPT_FILE);
155 if(filePath.length()>0) {
156 try {
157 File file = getSettingsHandler().getPathRelativeToWorkingDirectory(filePath);
158 interpreter.source(file.getPath());
159 } catch (IOException e) {
160 logger.log(Level.SEVERE,"unable to read script file",e);
161 }
162 }
163 } catch (EvalError e) {
164
165 e.printStackTrace();
166 }
167
168 return interpreter;
169 }
170
171
172 /***
173 * Setup (or reset) Intepreter variables, as appropraite based on
174 * thread-isolation setting.
175 */
176 public void kickUpdate() {
177
178
179 if((Boolean)getUncheckedAttribute(null,ATTR_ISOLATE_THREADS)) {
180 sharedInterpreter = null;
181 threadInterpreter = new ThreadLocal<Interpreter>();
182 } else {
183 sharedInterpreter = newInterpreter();
184 threadInterpreter = null;
185 }
186 }
187 }