View Javadoc

1   /*
2    * XMLSettingsHandlerTest
3    *
4    * $Id: XMLSettingsHandlerTest.java 6326 2009-05-28 01:40:33Z gojomo $
5    *
6    * Created on Jan 28, 2004
7    *
8    * Copyright (C) 2004 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify it under the
13   * terms of the GNU Lesser Public License as published by the Free Software
14   * Foundation; either version 2.1 of the License, or any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful, but WITHOUT ANY
17   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18   * A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details.
19   *
20   * You should have received a copy of the GNU Lesser Public License along with
21   * Heritrix; if not, write to the Free Software Foundation, Inc., 59 Temple
22   * Place, Suite 330, Boston, MA 02111-1307 USA
23   */
24  package org.archive.crawler.settings;
25  
26  import java.io.File;
27  import java.io.IOException;
28  import java.text.ParseException;
29  
30  import javax.management.Attribute;
31  import javax.management.AttributeNotFoundException;
32  import javax.management.InvalidAttributeValueException;
33  import javax.management.MBeanException;
34  import javax.management.ReflectionException;
35  
36  import org.apache.commons.httpclient.URIException;
37  import org.archive.crawler.datamodel.CrawlOrder;
38  import org.archive.crawler.datamodel.CrawlURI;
39  import org.archive.crawler.framework.CrawlScope;
40  import org.archive.crawler.scope.ClassicScope;
41  import org.archive.crawler.settings.refinements.Criteria;
42  import org.archive.crawler.settings.refinements.PortnumberCriteria;
43  import org.archive.crawler.settings.refinements.Refinement;
44  import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
45  import org.archive.crawler.settings.refinements.TimespanCriteria;
46  import org.archive.net.UURIFactory;
47  
48  /***
49   * Tests the handling of settings files.
50   *
51   * @author John Erik Halse
52   *
53   */
54  public class XMLSettingsHandlerTest extends SettingsFrameworkTestCase {
55  
56      /*
57       * @see TestCase#setUp()
58       */
59      protected void setUp() throws Exception {
60          super.setUp();
61      }
62  
63      /*
64       * @see TestCase#tearDown()
65       */
66      protected void tearDown() throws Exception {
67          super.tearDown();
68      }
69  
70      /*
71       * Test for void writeSettingsObject(CrawlerSettings)
72       */
73      public void testWriteSettingsObjectCrawlerSettings()
74              throws AttributeNotFoundException, InvalidAttributeValueException,
75              MBeanException, ReflectionException {
76  
77          // Write a crawl order file
78          CrawlerSettings settings = getGlobalSettings();
79          XMLSettingsHandler handler = getSettingsHandler();
80          handler.registerValueErrorHandler(this);
81          handler.getOrder().setAttribute(new ClassicScope());
82          handler.writeSettingsObject(settings);
83          assertTrue("Order file was not written", getOrderFile().exists());
84  
85          // Get a module to alter a setting on
86          ComplexType scope = settings.getModule(CrawlScope.ATTR_NAME);
87          assertNotNull("Could not get module scope", scope);
88  
89          // Alter two settings in a per host file
90          CrawlerSettings perHost = getPerHostSettings();
91          Integer newHops = new Integer(500);
92          String newFrom = "newfrom";
93          scope.setAttribute(perHost, new Attribute(
94              ClassicScope.ATTR_MAX_LINK_HOPS, newHops));
95          CrawlOrder order = handler.getOrder();
96          ComplexType httpHeaders = (ComplexType) order
97              .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS);
98          httpHeaders.setAttribute(perHost, new Attribute(CrawlOrder.ATTR_FROM,
99                  newFrom));
100 
101         // Write the per host file
102         handler.writeSettingsObject(perHost);
103         assertTrue("Per host file was not written", handler.settingsToFilename(
104                 perHost).exists());
105 
106         // Create a new handler for testing that changes was written to disk
107         XMLSettingsHandler newHandler = new XMLSettingsHandler(getOrderFile());
108         newHandler.initialize();
109 
110         // Read perHost
111         CrawlerSettings newPerHost = newHandler.getSettingsObject(perHost
112                 .getScope());
113         assertNotNull("Per host scope could not be read", newPerHost);
114 
115         ComplexType newScope = newHandler.getModule(CrawlScope.ATTR_NAME);
116         assertNotNull(newScope);
117         Integer r1 = (Integer) newScope.getAttribute(newPerHost,
118             ClassicScope.ATTR_MAX_LINK_HOPS);
119         assertEquals(newHops, r1);
120 
121         ComplexType newHttpHeaders = (ComplexType) newHandler.getOrder()
122                 .getAttribute(newPerHost, CrawlOrder.ATTR_HTTP_HEADERS);
123         assertNotNull(newHttpHeaders);
124 
125         String r2 = (String) newHttpHeaders.getAttribute(newPerHost,
126                 CrawlOrder.ATTR_FROM);
127         assertEquals(newFrom, r2);
128     }
129 
130     /***
131      * Test the copying of the entire settings directory.
132      *
133      * @throws IOException
134      */
135     public void testCopySettings() throws IOException {
136         //String testScope = "www.archive.org";
137 
138         // Write the files
139         XMLSettingsHandler handler = getSettingsHandler();
140         handler.writeSettingsObject(getGlobalSettings());
141         handler.writeSettingsObject(getPerHostSettings());
142 
143         // Copy to new location
144         File newOrderFile = new File(getTmpDir(), "SETTINGS_new_order.xml");
145         String newSettingsDir = "SETTINGS_new_per_host_settings";
146         handler.copySettings(newOrderFile, newSettingsDir);
147 
148         // Check if new files where created.
149         assertTrue("Order file was not written", newOrderFile.exists());
150 
151         assertTrue("New settings dir not set", handler.settingsToFilename(
152                 getPerHostSettings()).getAbsolutePath().matches(
153                 ".*" + newSettingsDir + ".*"));
154         assertTrue("Per host file was not written", handler.settingsToFilename(
155                 getPerHostSettings()).exists());
156     }
157 
158     public void testGetSettings() {
159         XMLSettingsHandler handler = getSettingsHandler();
160         CrawlerSettings order = handler.getSettingsObject(null);
161         CrawlerSettings perHost = handler.getSettings("localhost.localdomain");
162         assertNotNull("Didn't get any file", perHost);
163         assertSame("Did not get same file", order, perHost);
164     }
165 
166     public void testGetSettingsObject() {
167         String testScope = "audio.archive.org";
168 
169         XMLSettingsHandler handler = getSettingsHandler();
170         assertNotNull("Couldn't get orderfile", handler.getSettingsObject(null));
171         assertNull("Got nonexisting per host file", handler
172                 .getSettingsObject(testScope));
173         assertNotNull("Couldn't create per host file", handler
174                 .getOrCreateSettingsObject(testScope));
175         assertNotNull("Couldn't get per host file", handler
176                 .getSettingsObject(testScope));
177     }
178 
179     public void testDeleteSettingsObject() {
180         XMLSettingsHandler handler = getSettingsHandler();
181         File file = handler.settingsToFilename(getPerHostSettings());
182         handler.writeSettingsObject(getPerHostSettings());
183         assertTrue("Per host file was not written", file.exists());
184         handler.deleteSettingsObject(getPerHostSettings());
185         assertFalse("Per host file was not deleted", file.exists());
186     }
187 
188     public void testReadWriteRefinements() throws ParseException,
189             InvalidAttributeValueException, AttributeNotFoundException,
190             MBeanException, ReflectionException, URIException {
191         XMLSettingsHandler handler = getSettingsHandler();
192         CrawlerSettings global = getGlobalSettings();
193         CrawlerSettings per = getPerHostSettings();
194         ComplexType headers = (ComplexType) handler.getOrder().getAttribute(
195                 CrawlOrder.ATTR_HTTP_HEADERS);
196 
197         String globalFrom = (String) headers.getAttribute(CrawlOrder.ATTR_FROM);
198         String refinedGlobalFrom = "refined@global.address";
199         String refinedPerFrom = "refined@per.address";
200 
201         // Create a refinement on the global level
202         Refinement globalRefinement = new Refinement(global, "test",
203                 "Refinement test");
204         Criteria timespanCriteria = new TimespanCriteria("2300", "2300");
205         globalRefinement.addCriteria(timespanCriteria);
206         Criteria regexpCriteria = new RegularExpressionCriteria(".*www.*");
207         globalRefinement.addCriteria(regexpCriteria);
208         handler.writeSettingsObject(global);
209 
210         // Override an attribute on the global refinement
211         CrawlerSettings globalRefinementSetting = globalRefinement
212                 .getSettings();
213         headers.setAttribute(globalRefinementSetting, new Attribute(
214                 CrawlOrder.ATTR_FROM, refinedGlobalFrom));
215         handler.writeSettingsObject(globalRefinementSetting);
216 
217         // Create a refinement on a per level
218         Refinement perRefinement = new Refinement(per, "test2",
219                 "Refinement test2");
220         Criteria portCriteria = new PortnumberCriteria("10");
221         perRefinement.addCriteria(portCriteria);
222         handler.writeSettingsObject(per);
223 
224         // Override an attribute on the per refinement
225         CrawlerSettings perRefinementSetting = perRefinement.getSettings();
226         headers.setAttribute(perRefinementSetting, new Attribute(
227                 CrawlOrder.ATTR_FROM, refinedPerFrom));
228         handler.writeSettingsObject(perRefinementSetting);
229 
230         // Create a new handler for testing that changes was written to disk
231         XMLSettingsHandler newHandler = new XMLSettingsHandler(getOrderFile());
232         newHandler.initialize();
233         CrawlerSettings newGlobal = newHandler.getSettingsObject(null);
234         assertNotNull("Global scope could not be read", newGlobal);
235         CrawlerSettings newPer = newHandler.getSettingsObject(per.getScope());
236         assertNotNull("Per host scope could not be read", newPer);
237 
238         ComplexType newHeaders = (ComplexType) newHandler.getOrder()
239                 .getAttribute(CrawlOrder.ATTR_HTTP_HEADERS);
240         assertNotNull(newHeaders);
241 
242         String newFrom1 = (String) newHeaders.getAttribute(
243                 CrawlOrder.ATTR_FROM, getMatchDomainURI());
244         String newFrom2 = (String) newHeaders.getAttribute(
245                 CrawlOrder.ATTR_FROM, getMatchHostURI());
246         CrawlURI matchHostAndPortURI = new CrawlURI(
247             UURIFactory.getInstance("http://www.archive.org:10/index.html"));
248         String newFrom3 = (String) newHeaders.getAttribute(
249                 CrawlOrder.ATTR_FROM, matchHostAndPortURI);
250 
251         //Check that we got what we expected
252         assertEquals(globalFrom, newFrom1);
253         assertEquals(refinedGlobalFrom, newFrom2);
254         assertEquals(refinedPerFrom, newFrom3);
255     }
256     
257     public void testToResourcePath() {
258         assertTrue(
259             XMLSettingsHandler.toResourcePath(new File("/usr/local/bin"))
260             .startsWith("/usr/local/bin"));
261         assertTrue(
262             XMLSettingsHandler.toResourcePath(new File("/home/user1/Test.java"))
263             .startsWith("/home/user1/Test.java"));
264         if(File.separatorChar=='//') {
265             // run these only on relevant platform (Windows)
266             assertTrue(
267                 XMLSettingsHandler.toResourcePath(new File("C://Windows//System32"))
268                 .startsWith("/Windows/System32"));
269             assertTrue(
270                 XMLSettingsHandler.toResourcePath(new File("Z://some.dir//another.dir//some.file.ext"))
271                 .startsWith("/some.dir/another.dir/some.file.ext"));
272         }
273     }
274 }