1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.selftest;
24
25
26 import java.io.File;
27 import java.io.FileNotFoundException;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.Iterator;
31 import java.util.List;
32
33 import javax.management.AttributeNotFoundException;
34 import javax.management.MBeanException;
35 import javax.management.ReflectionException;
36
37 import junit.framework.TestCase;
38
39 import org.archive.crawler.admin.CrawlJob;
40 import org.archive.crawler.datamodel.CrawlOrder;
41 import org.archive.crawler.settings.ComplexType;
42 import org.archive.crawler.settings.StringList;
43 import org.archive.crawler.writer.ARCWriterProcessor;
44 import org.archive.io.arc.ARCReader;
45 import org.archive.io.arc.ARCReaderFactory;
46 import org.archive.io.arc.ARCRecordMetaData;
47 import org.archive.util.FileUtils;
48
49
50 /***
51 * Base class for integrated selftest unit tests.
52 *
53 * Has utility for integrated selftest such as location of selftest generated
54 * arc file.
55 *
56 * @author stack
57 * @version $Id: SelfTestCase.java 4931 2007-02-21 18:48:17Z gojomo $
58 */
59 public abstract class SelfTestCase extends TestCase
60 {
61 /***
62 * Suffix for selftest classes.
63 */
64 protected static final String SELFTEST = "SelfTest";
65
66 private static CrawlJob crawlJob = null;
67 private static File crawlJobDir = null;
68 private static File [] arcFile = null;
69 private static String selftestURL = null;
70
71 /***
72 * Directory logs are kept in.
73 */
74 private static File logsDir = null;
75
76 /***
77 * Has the static initializer for this class been run.
78 */
79 private static boolean initialized = false;
80
81 /***
82 * The selftest webapp htdocs directory.
83 */
84 private static File htdocs = null;
85
86
87 /***
88 * A reference to an ARCReader on which the validate method has been called.
89 * Can be used to walk the metadata.
90 *
91 * @see org.archive.io.arc.ARCReader#validate()
92 */
93 private static ARCReader [] readReader = null;
94
95 /***
96 * Metadata list from the arc reader.
97 *
98 * Gotten as byproduct of calling validate on the arcreader.
99 */
100 private static List [] metaDatas;
101
102
103 public SelfTestCase()
104 {
105 super();
106 }
107
108 public SelfTestCase(String testName)
109 {
110 super(testName);
111 }
112
113 public void testNothing() {
114
115
116 }
117
118 public void assertInitialized() {
119 assertTrue("SelfTestCase.initialize() not called " +
120 "before running selftest.", initialized);
121 }
122
123 /***
124 * Test non null and not empty.
125 *
126 * @param str String to test.
127 * @return The passed string.
128 * @throws IllegalArgumentException if null or empty string.
129 */
130 protected static void assertNonEmpty(String str) {
131 assertTrue("String "+str+" is empty",str.length()>0);
132 }
133
134 /***
135 * Test nonull and exits.
136 *
137 * @param file File to test.
138 * @return Passed file.
139 * @throws FileNotFoundException passed file doesn't exist.
140 */
141 protected static void assertExists(File file) {
142 assertTrue("File "+file+" doesn't exist",file.exists());
143 }
144
145 /***
146 * Static initializer.
147 *
148 * Must be called before instantiation of any tests based off this class.
149 *
150 * @param url URL to selftest webapp.
151 * @param job The selftest crawl job.
152 * @param jobDir Job output directory. Has the seed file, the order file
153 * and logs.
154 * @param docs Expanded webapp directory location.
155 *
156 * @throws IOException if nonexistent directories passed.
157 */
158 public static synchronized void initialize(final String url,
159 final CrawlJob job, final File jobDir, final File docs)
160 throws IOException, AttributeNotFoundException, MBeanException,
161 ReflectionException, InterruptedException
162 {
163 assertNotNull(url);
164 assertNonEmpty(url);
165 SelfTestCase.selftestURL = url.endsWith("/")? url: url + "/";
166
167 assertNotNull(job);
168 SelfTestCase.crawlJob = job;
169
170 assertNotNull(jobDir);
171 assertExists(jobDir);
172 SelfTestCase.crawlJobDir = jobDir;
173
174 assertNotNull(docs);
175 assertExists(docs);
176 SelfTestCase.htdocs = docs;
177
178
179
180
181 CrawlOrder crawlOrder =job.getSettingsHandler().getOrder();
182 assertNotNull(crawlOrder);
183
184 String diskPath = (String)crawlOrder.
185 getAttribute(null, CrawlOrder.ATTR_DISK_PATH);
186 if (diskPath != null && diskPath.length() > 0 &&
187 diskPath.startsWith(File.separator)) {
188 SelfTestCase.logsDir = new File(diskPath);
189 } else {
190 SelfTestCase.logsDir =
191 (diskPath != null && diskPath.length() > 0)?
192 new File(jobDir, diskPath): jobDir;
193 }
194 assertNotNull(SelfTestCase.logsDir);
195 assertExists(SelfTestCase.logsDir);
196
197
198
199 ComplexType arcWriterProcessor =
200 crawlOrder.getSettingsHandler().getModule("Archiver");
201 String arcDirStr = (String)((StringList)arcWriterProcessor.
202 getAttribute(ARCWriterProcessor.ATTR_PATH)).get(0);
203 File arcDir = null;
204 if (arcDirStr != null && arcDirStr.length() > 0 &&
205 arcDirStr.startsWith(File.separator)) {
206 arcDir = new File(arcDirStr);
207 } else {
208 arcDir = (arcDirStr != null && arcDirStr.length() > 0)?
209 new File(SelfTestCase.logsDir, arcDirStr): SelfTestCase.logsDir;
210 }
211 assertNotNull(arcDir);
212 assertExists(arcDir);
213
214 String prefix = ((String)arcWriterProcessor.
215 getAttribute(ARCWriterProcessor.ATTR_PREFIX));
216 assertNotNull(prefix);
217 assertNonEmpty(prefix);
218
219 File [] arcs = FileUtils.getFilesWithPrefix(arcDir, prefix);
220
221
222
223
224
225
226 SelfTestCase.readReader = new ARCReader[arcs.length];
227 SelfTestCase.arcFile = new File[arcs.length];
228 SelfTestCase.metaDatas = new List[arcs.length];
229 for (int i = 0; i < arcs.length; i++) {
230 File f = arcs[i];
231 SelfTestCase.arcFile[i] = f;
232 SelfTestCase.readReader[i] = ARCReaderFactory.get(f);
233 SelfTestCase.metaDatas[i] = SelfTestCase.readReader[i].validate();
234 }
235 SelfTestCase.initialized = true;
236 }
237
238 /***
239 * @return Returns the arcDir.
240 */
241 protected static File [] getArcFiles() {
242 return arcFile;
243 }
244
245 /***
246 * @return Returns the jobDir.
247 */
248 protected static File getCrawlJobDir()
249 {
250 return SelfTestCase.crawlJobDir;
251 }
252
253 /***
254 * @return Return the directory w/ logs in it.
255 */
256 protected static File getLogsDir()
257 {
258 return SelfTestCase.logsDir;
259 }
260
261 /***
262 * Returns the selftest read ARCReader.
263 *
264 * The returned ARCReader has been validated. Use it to get at metadata.
265 *
266 * @return Returns the readReader, an ARCReader that has been validated.
267 */
268 protected static ARCReader [] getReadReaders() {
269 return SelfTestCase.readReader;
270 }
271
272 /***
273 * @return Returns list of ARCReader metadatas, the byproduct of calling
274 * validate.
275 */
276 protected static List [] getMetaDatas() {
277 return SelfTestCase.metaDatas;
278 }
279
280 /***
281 * @return Returns the selftestURL.
282 */
283 public static String getSelftestURL()
284 {
285 return SelfTestCase.selftestURL;
286 }
287
288 /***
289 * @return Returns the selftestURL. URL returned is guaranteed to have
290 * a trailing '/'.
291 */
292 public static String getSelftestURLWithTrailingSlash()
293 {
294 return selftestURL.endsWith("/")? selftestURL: selftestURL + "/";
295 }
296
297 /***
298 * Calculates test name by stripping SelfTest from current class name.
299 *
300 * @return The name of the test.
301 */
302 public String getTestName()
303 {
304 String classname = getClass().getName();
305 int selftestIndex = classname.indexOf(SELFTEST);
306 assertTrue("Class name ends with SelfTest", selftestIndex > 0);
307 int lastDotIndex = classname.lastIndexOf('.');
308 assertTrue("Package dot in unexpected location",
309 lastDotIndex + 1 < classname.length() && lastDotIndex > 0);
310 return classname.substring(lastDotIndex + 1, selftestIndex);
311 }
312
313 /***
314 * @return Returns the selftest webappDir.
315 */
316 public static File getHtdocs()
317 {
318 return SelfTestCase.htdocs;
319 }
320
321 /***
322 * @return Returns the crawlJob.
323 */
324 public static CrawlJob getCrawlJob()
325 {
326 return crawlJob;
327 }
328
329 /***
330 * Confirm passed files exist on disk under the test directory.
331 *
332 * @param files Files to test for existence under the test's directory.
333 * @return true if all files exist on disk.
334 */
335 public boolean filesExist(List files)
336 {
337 boolean result = true;
338 for (Iterator i = files.iterator(); i.hasNext();)
339 {
340 if (!fileExists((File)i.next()))
341 {
342 result = false;
343 break;
344 }
345 }
346 return result;
347 }
348
349 /***
350 * Confirm passed file exists on disk under the test directory.
351 *
352 * This method takes care of building up the file path under the selftest
353 * webapp. Just pass the file name.
354 *
355 * @param file Name of file to look for.
356 * @return True if file exists.
357 */
358 public boolean fileExists(File file)
359 {
360 File testDir = new File(getHtdocs(), getTestName());
361 File fileOnDisk = new File(testDir, file.getPath());
362 return fileOnDisk.exists();
363 }
364
365 /***
366 * Test passed list were all found in the arc.
367 *
368 * If more or less found, test fails.
369 *
370 * @param files List of files to find in the arc. No other files but these
371 * should be found in the arc.
372 */
373 public void testFilesInArc(List<File> files)
374 {
375 testFilesInArc(files, filesFoundInArc());
376 }
377
378 /***
379 * Test passed list were all found in the arc.
380 *
381 * If more or less found, test fails.
382 *
383 * @param files List of files to find in the arc. No other files but these
384 * should be found in the arc.
385 * @param foundFiles Files found in the arc.
386 */
387 public void testFilesInArc(List<File> files, List<File> foundFiles)
388 {
389 assertTrue("All files are on disk: " + files, filesExist(files));
390 assertTrue("All found: " + files + ", " + foundFiles,
391 foundFiles.containsAll(files));
392 assertTrue("Same size: " + files + ", " + foundFiles,
393 foundFiles.size() == files.size());
394 }
395
396 /***
397 * Find all files that belong to this test that are mentioned in the arc.
398 * @return List of unique found file File objects.
399 */
400 protected List<File> filesFoundInArc() {
401 String baseURL = getSelftestURLWithTrailingSlash();
402 if (baseURL.endsWith(getTestName() + '/')) {
403
404
405 baseURL = baseURL.substring(0, baseURL.length() - 1);
406 } else {
407 baseURL += getTestName();
408 }
409 List [] metaDatas = getMetaDatas();
410 ARCRecordMetaData metaData = null;
411 List<File> filesFound = new ArrayList<File>();
412 for (int mdi = 0; mdi < metaDatas.length; mdi++) {
413 List list = metaDatas[mdi];
414 for (final Iterator i = list.iterator(); i.hasNext();) {
415 metaData = (ARCRecordMetaData) i.next();
416 String url = metaData.getUrl();
417 if (url.startsWith(baseURL)
418 && metaData.getMimetype().equalsIgnoreCase("text/html")) {
419 String fileName = url.substring(baseURL.length());
420 if (fileName.startsWith("/")) {
421 fileName = fileName.substring(1);
422 }
423 if (fileName != null && fileName.length() > 0) {
424 File f = new File(fileName);
425 if (!filesFound.contains(f)) {
426
427 filesFound.add(new File(fileName));
428 }
429 }
430 }
431 }
432 }
433 return filesFound;
434 }
435 }