View Javadoc

1   /* SelfTestCase
2    *
3    * Created on Feb 4, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.selftest;
24  
25  
26  import java.io.File;
27  import java.io.FileNotFoundException;
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.Iterator;
31  import java.util.List;
32  
33  import javax.management.AttributeNotFoundException;
34  import javax.management.MBeanException;
35  import javax.management.ReflectionException;
36  
37  import junit.framework.TestCase;
38  
39  import org.archive.crawler.admin.CrawlJob;
40  import org.archive.crawler.datamodel.CrawlOrder;
41  import org.archive.crawler.settings.ComplexType;
42  import org.archive.crawler.settings.StringList;
43  import org.archive.crawler.writer.ARCWriterProcessor;
44  import org.archive.io.arc.ARCReader;
45  import org.archive.io.arc.ARCReaderFactory;
46  import org.archive.io.arc.ARCRecordMetaData;
47  import org.archive.util.FileUtils;
48  
49  
50  /***
51   * Base class for integrated selftest unit tests.
52   *
53   * Has utility for integrated selftest such as location of selftest generated
54   * arc file.
55   *
56   * @author stack
57   * @version $Id: SelfTestCase.java 4931 2007-02-21 18:48:17Z gojomo $
58   */
59  public abstract class SelfTestCase extends TestCase
60  {
61      /***
62       * Suffix for selftest classes.
63       */
64      protected static final String SELFTEST = "SelfTest";
65  
66      private static CrawlJob crawlJob = null;
67      private static File crawlJobDir = null;
68      private static File [] arcFile = null;
69      private static String selftestURL = null;
70  
71      /***
72       * Directory logs are kept in.
73       */
74      private static File logsDir = null;
75  
76      /***
77       * Has the static initializer for this class been run.
78       */
79      private static boolean initialized = false;
80  
81      /***
82       * The selftest webapp htdocs directory.
83       */
84      private static File htdocs = null;
85  
86  
87      /***
88       * A reference to an ARCReader on which the validate method has been called.
89       * Can be used to walk the metadata.
90       *
91       * @see org.archive.io.arc.ARCReader#validate()
92       */
93      private static ARCReader [] readReader = null;
94  
95      /***
96       * Metadata list from the arc reader.
97       *
98       * Gotten as byproduct of calling validate on the arcreader.
99       */
100 	private static List [] metaDatas;
101 
102 
103     public SelfTestCase()
104     {
105         super();
106     }
107 
108     public SelfTestCase(String testName)
109     {
110         super(testName);
111     }
112 
113     public void testNothing() {
114         // dummy test that always succeeds; prevents warning of no tests found
115         // when running 'all JUnit tests' in Heritrix project
116     }
117     
118     public void assertInitialized() {
119         assertTrue("SelfTestCase.initialize() not called " +
120                 "before running selftest.", initialized);
121     }
122 
123     /***
124      * Test non null and not empty.
125      *
126      * @param str String to test.
127      * @return The passed string.
128      * @throws IllegalArgumentException if null or empty string.
129      */
130     protected static void assertNonEmpty(String str) {
131         assertTrue("String "+str+" is empty",str.length()>0);
132     }
133 
134     /***
135      * Test nonull and exits.
136      *
137      * @param file File to test.
138      * @return Passed file.
139      * @throws FileNotFoundException passed file doesn't exist.
140      */
141     protected static void assertExists(File file) {
142         assertTrue("File "+file+" doesn't exist",file.exists());
143     }
144 
145     /***
146      * Static initializer.
147      *
148      * Must be called before instantiation of any tests based off this class.
149      *
150      * @param url URL to selftest webapp.
151      * @param job The selftest crawl job.
152      * @param jobDir Job output directory.  Has the seed file, the order file
153      * and logs.
154      * @param docs Expanded webapp directory location.
155      *
156      * @throws IOException if nonexistent directories passed.
157      */
158     public static synchronized void initialize(final String url,
159             final CrawlJob job, final File jobDir, final File docs)
160         throws IOException, AttributeNotFoundException, MBeanException,
161             ReflectionException, InterruptedException
162     {
163         assertNotNull(url);
164         assertNonEmpty(url);
165         SelfTestCase.selftestURL = url.endsWith("/")? url: url + "/";
166         
167         assertNotNull(job);
168         SelfTestCase.crawlJob = job;
169         
170         assertNotNull(jobDir);
171         assertExists(jobDir);
172         SelfTestCase.crawlJobDir = jobDir;
173         
174         assertNotNull(docs);
175         assertExists(docs);
176         SelfTestCase.htdocs = docs;
177         
178         // Calculate the logs directory.  If diskPath is not absolute, then logs
179         // are in the jobs directory under the diskPath subdirectory.  Guard
180         // against case where diskPath is empty.
181         CrawlOrder crawlOrder =job.getSettingsHandler().getOrder();
182         assertNotNull(crawlOrder);
183 
184         String diskPath = (String)crawlOrder.
185             getAttribute(null, CrawlOrder.ATTR_DISK_PATH);
186         if (diskPath != null && diskPath.length() > 0 &&
187             diskPath.startsWith(File.separator)) {
188             SelfTestCase.logsDir = new File(diskPath);
189         } else {
190             SelfTestCase.logsDir =
191                 (diskPath != null && diskPath.length() > 0)?
192                     new File(jobDir, diskPath): jobDir;
193         }
194         assertNotNull(SelfTestCase.logsDir);
195         assertExists(SelfTestCase.logsDir);
196         
197         // Calculate the arcfile name.  Find it in the arcDir.  Should only be
198         // one. Then make an instance of ARCReader and call the validate on it.
199         ComplexType arcWriterProcessor =
200             crawlOrder.getSettingsHandler().getModule("Archiver");
201         String arcDirStr = (String)((StringList)arcWriterProcessor.
202             getAttribute(ARCWriterProcessor.ATTR_PATH)).get(0);
203         File arcDir = null;
204         if (arcDirStr != null && arcDirStr.length() > 0 &&
205                 arcDirStr.startsWith(File.separator)) {
206             arcDir = new File(arcDirStr);
207         } else {
208             arcDir = (arcDirStr != null && arcDirStr.length() > 0)?
209                 new File(SelfTestCase.logsDir, arcDirStr): SelfTestCase.logsDir;
210         }
211         assertNotNull(arcDir);
212         assertExists(arcDir);
213         
214         String prefix = ((String)arcWriterProcessor.
215             getAttribute(ARCWriterProcessor.ATTR_PREFIX));
216         assertNotNull(prefix);
217         assertNonEmpty(prefix);
218         
219         File [] arcs = FileUtils.getFilesWithPrefix(arcDir, prefix);
220         /*
221         if (arcs.length != 1) {
222             throw new IOException("Expected one only arc file.  Found" +
223                 " instead " + Integer.toString(arcs.length) + " files.");
224         }
225         */
226         SelfTestCase.readReader = new ARCReader[arcs.length];
227         SelfTestCase.arcFile = new File[arcs.length];
228         SelfTestCase.metaDatas = new List[arcs.length];
229         for (int i = 0; i < arcs.length; i++) {
230         	File f = arcs[i];
231             SelfTestCase.arcFile[i] = f;
232             SelfTestCase.readReader[i] = ARCReaderFactory.get(f);
233             SelfTestCase.metaDatas[i] = SelfTestCase.readReader[i].validate();
234         }
235         SelfTestCase.initialized = true;
236     }
237 
238     /***
239      * @return Returns the arcDir.
240      */
241     protected static File [] getArcFiles() {
242         return arcFile;
243     }
244 
245     /***
246      * @return Returns the jobDir.
247      */
248     protected static File getCrawlJobDir()
249     {
250         return SelfTestCase.crawlJobDir;
251     }
252 
253     /***
254      * @return Return the directory w/ logs in it.
255      */
256     protected static File getLogsDir()
257     {
258         return SelfTestCase.logsDir;
259     }
260 
261     /***
262      * Returns the selftest read ARCReader.
263      *
264      * The returned ARCReader has been validated.  Use it to get at metadata.
265      *
266      * @return Returns the readReader, an ARCReader that has been validated.
267      */
268     protected static ARCReader [] getReadReaders() {
269         return SelfTestCase.readReader;
270     }
271 
272     /***
273      * @return Returns list of ARCReader metadatas, the byproduct of calling
274      * validate.
275      */
276     protected static List [] getMetaDatas() {
277         return SelfTestCase.metaDatas;
278     }
279 
280     /***
281      * @return Returns the selftestURL.
282      */
283     public static String getSelftestURL()
284     {
285         return SelfTestCase.selftestURL;
286     }
287 
288     /***
289      * @return Returns the selftestURL.  URL returned is guaranteed to have
290      * a trailing '/'.
291      */
292     public static String getSelftestURLWithTrailingSlash()
293     {
294         return selftestURL.endsWith("/")? selftestURL: selftestURL + "/";
295     }
296 
297     /***
298      * Calculates test name by stripping SelfTest from current class name.
299      *
300      * @return The name of the test.
301      */
302     public String getTestName()
303     {
304         String classname = getClass().getName();
305         int selftestIndex = classname.indexOf(SELFTEST);
306         assertTrue("Class name ends with SelfTest", selftestIndex > 0);
307         int lastDotIndex = classname.lastIndexOf('.');
308         assertTrue("Package dot in unexpected location",
309             lastDotIndex + 1 < classname.length() && lastDotIndex > 0);
310         return classname.substring(lastDotIndex + 1, selftestIndex);
311     }
312 
313     /***
314      * @return Returns the selftest webappDir.
315      */
316     public static File getHtdocs()
317     {
318         return SelfTestCase.htdocs;
319     }
320 
321     /***
322      * @return Returns the crawlJob.
323      */
324     public static CrawlJob getCrawlJob()
325     {
326         return crawlJob;
327     }
328 
329     /***
330      * Confirm passed files exist on disk under the test directory.
331      *
332      * @param files Files to test for existence under the test's directory.
333      * @return true if all files exist on disk.
334      */
335     public boolean filesExist(List files)
336     {
337         boolean result = true;
338         for (Iterator i = files.iterator(); i.hasNext();)
339         {
340             if (!fileExists((File)i.next()))
341             {
342                 result = false;
343                 break;
344             }
345         }
346         return result;
347     }
348 
349     /***
350      * Confirm passed file exists on disk under the test directory.
351      *
352      * This method takes care of building up the file path under the selftest
353      * webapp.  Just pass the file name.
354      *
355      * @param file Name of file to look for.
356      * @return True if file exists.
357      */
358     public boolean fileExists(File file)
359     {
360         File testDir = new File(getHtdocs(), getTestName());
361         File fileOnDisk = new File(testDir, file.getPath());
362         return fileOnDisk.exists();
363     }
364 
365     /***
366      * Test passed list were all found in the arc.
367      *
368      * If more or less found, test fails.
369      *
370      * @param files List of files to find in the arc.  No other files but these
371      * should be found in the arc.
372      */
373     public void testFilesInArc(List<File> files)
374     {
375         testFilesInArc(files, filesFoundInArc());
376     }
377     
378     /***
379      * Test passed list were all found in the arc.
380      *
381      * If more or less found, test fails.
382      *
383      * @param files List of files to find in the arc.  No other files but these
384      * should be found in the arc.
385      * @param foundFiles Files found in the arc.
386      */
387     public void testFilesInArc(List<File> files, List<File> foundFiles)
388     {
389         assertTrue("All files are on disk: " + files, filesExist(files));
390         assertTrue("All found: " + files + ", " + foundFiles,
391             foundFiles.containsAll(files));
392         assertTrue("Same size: " + files + ", " + foundFiles,
393             foundFiles.size() == files.size());
394     }
395 
396     /***
397      * Find all files that belong to this test that are mentioned in the arc.
398      * @return List of unique found file File objects.
399      */
400     protected List<File> filesFoundInArc() {
401         String baseURL = getSelftestURLWithTrailingSlash();
402         if (baseURL.endsWith(getTestName() + '/')) {
403             // URL may already end in the test name for case where we're
404             // running one test only.  If so, strip back the trailing '/'.
405             baseURL = baseURL.substring(0, baseURL.length() - 1);
406         } else {
407             baseURL += getTestName();
408         }
409         List [] metaDatas = getMetaDatas();
410         ARCRecordMetaData metaData = null;
411         List<File> filesFound = new ArrayList<File>();
412         for (int mdi = 0; mdi < metaDatas.length; mdi++) {
413         	List list = metaDatas[mdi];
414 			for (final Iterator i = list.iterator(); i.hasNext();) {
415 				metaData = (ARCRecordMetaData) i.next();
416 				String url = metaData.getUrl();
417 				if (url.startsWith(baseURL)
418 						&& metaData.getMimetype().equalsIgnoreCase("text/html")) {
419 					String fileName = url.substring(baseURL.length());
420 					if (fileName.startsWith("/")) {
421 						fileName = fileName.substring(1);
422 					}
423 					if (fileName != null && fileName.length() > 0) {
424 						File f = new File(fileName);
425 						if (!filesFound.contains(f)) {
426 							// Don't add duplicates.
427 							filesFound.add(new File(fileName));
428 						}
429 					}
430 				}
431 			}
432 		}
433         return filesFound;
434     }
435 }