1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.settings;
25
26 import java.io.File;
27 import java.io.IOException;
28 import java.text.ParseException;
29
30 import javax.management.Attribute;
31 import javax.management.AttributeNotFoundException;
32 import javax.management.InvalidAttributeValueException;
33 import javax.management.MBeanException;
34 import javax.management.ReflectionException;
35
36 import org.apache.commons.httpclient.URIException;
37 import org.archive.crawler.datamodel.CrawlOrder;
38 import org.archive.crawler.datamodel.CrawlURI;
39 import org.archive.crawler.framework.CrawlScope;
40 import org.archive.crawler.scope.ClassicScope;
41 import org.archive.crawler.settings.refinements.Criteria;
42 import org.archive.crawler.settings.refinements.PortnumberCriteria;
43 import org.archive.crawler.settings.refinements.Refinement;
44 import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
45 import org.archive.crawler.settings.refinements.TimespanCriteria;
46 import org.archive.net.UURIFactory;
47
48 /***
49 * Tests the handling of settings files.
50 *
51 * @author John Erik Halse
52 *
53 */
54 public class XMLSettingsHandlerTest extends SettingsFrameworkTestCase {
55
56
57
58
59 protected void setUp() throws Exception {
60 super.setUp();
61 }
62
63
64
65
66 protected void tearDown() throws Exception {
67 super.tearDown();
68 }
69
70
71
72
73 public void testWriteSettingsObjectCrawlerSettings()
74 throws AttributeNotFoundException, InvalidAttributeValueException,
75 MBeanException, ReflectionException {
76
77
78 CrawlerSettings settings = getGlobalSettings();
79 XMLSettingsHandler handler = getSettingsHandler();
80 handler.registerValueErrorHandler(this);
81 handler.getOrder().setAttribute(new ClassicScope());
82 handler.writeSettingsObject(settings);
83 assertTrue("Order file was not written", getOrderFile().exists());
84
85
86 ComplexType scope = settings.getModule(CrawlScope.ATTR_NAME);
87 assertNotNull("Could not get module scope", scope);
88
89
90 CrawlerSettings perHost = getPerHostSettings();
91 Integer newHops = new Integer(500);
92 String newFrom = "newfrom";
93 scope.setAttribute(perHost, new Attribute(
94 ClassicScope.ATTR_MAX_LINK_HOPS, newHops));
95 CrawlOrder order = handler.getOrder();
96 ComplexType httpHeaders = (ComplexType) order
97 .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS);
98 httpHeaders.setAttribute(perHost, new Attribute(CrawlOrder.ATTR_FROM,
99 newFrom));
100
101
102 handler.writeSettingsObject(perHost);
103 assertTrue("Per host file was not written", handler.settingsToFilename(
104 perHost).exists());
105
106
107 XMLSettingsHandler newHandler = new XMLSettingsHandler(getOrderFile());
108 newHandler.initialize();
109
110
111 CrawlerSettings newPerHost = newHandler.getSettingsObject(perHost
112 .getScope());
113 assertNotNull("Per host scope could not be read", newPerHost);
114
115 ComplexType newScope = newHandler.getModule(CrawlScope.ATTR_NAME);
116 assertNotNull(newScope);
117 Integer r1 = (Integer) newScope.getAttribute(newPerHost,
118 ClassicScope.ATTR_MAX_LINK_HOPS);
119 assertEquals(newHops, r1);
120
121 ComplexType newHttpHeaders = (ComplexType) newHandler.getOrder()
122 .getAttribute(newPerHost, CrawlOrder.ATTR_HTTP_HEADERS);
123 assertNotNull(newHttpHeaders);
124
125 String r2 = (String) newHttpHeaders.getAttribute(newPerHost,
126 CrawlOrder.ATTR_FROM);
127 assertEquals(newFrom, r2);
128 }
129
130 /***
131 * Test the copying of the entire settings directory.
132 *
133 * @throws IOException
134 */
135 public void testCopySettings() throws IOException {
136
137
138
139 XMLSettingsHandler handler = getSettingsHandler();
140 handler.writeSettingsObject(getGlobalSettings());
141 handler.writeSettingsObject(getPerHostSettings());
142
143
144 File newOrderFile = new File(getTmpDir(), "SETTINGS_new_order.xml");
145 String newSettingsDir = "SETTINGS_new_per_host_settings";
146 handler.copySettings(newOrderFile, newSettingsDir);
147
148
149 assertTrue("Order file was not written", newOrderFile.exists());
150
151 assertTrue("New settings dir not set", handler.settingsToFilename(
152 getPerHostSettings()).getAbsolutePath().matches(
153 ".*" + newSettingsDir + ".*"));
154 assertTrue("Per host file was not written", handler.settingsToFilename(
155 getPerHostSettings()).exists());
156 }
157
158 public void testGetSettings() {
159 XMLSettingsHandler handler = getSettingsHandler();
160 CrawlerSettings order = handler.getSettingsObject(null);
161 CrawlerSettings perHost = handler.getSettings("localhost.localdomain");
162 assertNotNull("Didn't get any file", perHost);
163 assertSame("Did not get same file", order, perHost);
164 }
165
166 public void testGetSettingsObject() {
167 String testScope = "audio.archive.org";
168
169 XMLSettingsHandler handler = getSettingsHandler();
170 assertNotNull("Couldn't get orderfile", handler.getSettingsObject(null));
171 assertNull("Got nonexisting per host file", handler
172 .getSettingsObject(testScope));
173 assertNotNull("Couldn't create per host file", handler
174 .getOrCreateSettingsObject(testScope));
175 assertNotNull("Couldn't get per host file", handler
176 .getSettingsObject(testScope));
177 }
178
179 public void testDeleteSettingsObject() {
180 XMLSettingsHandler handler = getSettingsHandler();
181 File file = handler.settingsToFilename(getPerHostSettings());
182 handler.writeSettingsObject(getPerHostSettings());
183 assertTrue("Per host file was not written", file.exists());
184 handler.deleteSettingsObject(getPerHostSettings());
185 assertFalse("Per host file was not deleted", file.exists());
186 }
187
188 public void testReadWriteRefinements() throws ParseException,
189 InvalidAttributeValueException, AttributeNotFoundException,
190 MBeanException, ReflectionException, URIException {
191 XMLSettingsHandler handler = getSettingsHandler();
192 CrawlerSettings global = getGlobalSettings();
193 CrawlerSettings per = getPerHostSettings();
194 ComplexType headers = (ComplexType) handler.getOrder().getAttribute(
195 CrawlOrder.ATTR_HTTP_HEADERS);
196
197 String globalFrom = (String) headers.getAttribute(CrawlOrder.ATTR_FROM);
198 String refinedGlobalFrom = "refined@global.address";
199 String refinedPerFrom = "refined@per.address";
200
201
202 Refinement globalRefinement = new Refinement(global, "test",
203 "Refinement test");
204 Criteria timespanCriteria = new TimespanCriteria("2300", "2300");
205 globalRefinement.addCriteria(timespanCriteria);
206 Criteria regexpCriteria = new RegularExpressionCriteria(".*www.*");
207 globalRefinement.addCriteria(regexpCriteria);
208 handler.writeSettingsObject(global);
209
210
211 CrawlerSettings globalRefinementSetting = globalRefinement
212 .getSettings();
213 headers.setAttribute(globalRefinementSetting, new Attribute(
214 CrawlOrder.ATTR_FROM, refinedGlobalFrom));
215 handler.writeSettingsObject(globalRefinementSetting);
216
217
218 Refinement perRefinement = new Refinement(per, "test2",
219 "Refinement test2");
220 Criteria portCriteria = new PortnumberCriteria("10");
221 perRefinement.addCriteria(portCriteria);
222 handler.writeSettingsObject(per);
223
224
225 CrawlerSettings perRefinementSetting = perRefinement.getSettings();
226 headers.setAttribute(perRefinementSetting, new Attribute(
227 CrawlOrder.ATTR_FROM, refinedPerFrom));
228 handler.writeSettingsObject(perRefinementSetting);
229
230
231 XMLSettingsHandler newHandler = new XMLSettingsHandler(getOrderFile());
232 newHandler.initialize();
233 CrawlerSettings newGlobal = newHandler.getSettingsObject(null);
234 assertNotNull("Global scope could not be read", newGlobal);
235 CrawlerSettings newPer = newHandler.getSettingsObject(per.getScope());
236 assertNotNull("Per host scope could not be read", newPer);
237
238 ComplexType newHeaders = (ComplexType) newHandler.getOrder()
239 .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS);
240 assertNotNull(newHeaders);
241
242 String newFrom1 = (String) newHeaders.getAttribute(
243 CrawlOrder.ATTR_FROM, getMatchDomainURI());
244 String newFrom2 = (String) newHeaders.getAttribute(
245 CrawlOrder.ATTR_FROM, getMatchHostURI());
246 CrawlURI matchHostAndPortURI = new CrawlURI(
247 UURIFactory.getInstance("http://www.archive.org:10/index.html"));
248 String newFrom3 = (String) newHeaders.getAttribute(
249 CrawlOrder.ATTR_FROM, matchHostAndPortURI);
250
251
252 assertEquals(globalFrom, newFrom1);
253 assertEquals(refinedGlobalFrom, newFrom2);
254 assertEquals(refinedPerFrom, newFrom3);
255 }
256
257 public void testToResourcePath() {
258 assertTrue(
259 XMLSettingsHandler.toResourcePath(new File("/usr/local/bin"))
260 .startsWith("/usr/local/bin"));
261 assertTrue(
262 XMLSettingsHandler.toResourcePath(new File("/home/user1/Test.java"))
263 .startsWith("/home/user1/Test.java"));
264 if(File.separatorChar=='//') {
265
266 assertTrue(
267 XMLSettingsHandler.toResourcePath(new File("C://Windows//System32"))
268 .startsWith("/Windows/System32"));
269 assertTrue(
270 XMLSettingsHandler.toResourcePath(new File("Z://some.dir//another.dir//some.file.ext"))
271 .startsWith("/some.dir/another.dir/some.file.ext"));
272 }
273 }
274 }