View Javadoc

1   /* SelftestCrawlJobHandler
2    *
3    * Created on Feb 4, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.selftest;
24  
25  import java.io.File;
26  import java.util.ArrayList;
27  import java.util.Date;
28  import java.util.List;
29  import java.util.logging.Logger;
30  
31  import junit.framework.Test;
32  import junit.framework.TestResult;
33  
34  import org.archive.crawler.Heritrix;
35  import org.archive.crawler.admin.CrawlJob;
36  import org.archive.crawler.admin.CrawlJobHandler;
37  import org.archive.crawler.datamodel.CrawlURI;
38  import org.archive.crawler.event.CrawlURIDispositionListener;
39  
40  
41  /***
42   * An override to gain access to end-of-crawljob message.
43   *
44   *
45   * @author stack
46   * @version $Id: SelfTestCrawlJobHandler.java 4667 2006-09-26 20:38:48Z paul_jack $
47   */
48  
49  public class SelfTestCrawlJobHandler extends CrawlJobHandler
50  implements CrawlURIDispositionListener {
51      /***
52       * Name of the selftest webapp.
53       */
54      private static final String SELFTEST_WEBAPP = "selftest";
55  
56      private static Logger logger =
57          Logger.getLogger("org.archive.crawler.admin.SelftestCrawlJobHandler");
58  
59      /***
60       * Name of selftest to run.
61       *
62       * If set, run this test only.  Otherwise run them all.
63       */
64      private String selfTestName = null;
65      
66      private String selfTestUrl = null;
67  
68  
69      private SelfTestCrawlJobHandler() {
70          this(null, null, null);
71      }
72  
73      public SelfTestCrawlJobHandler(final File jobsDir,
74              final String selfTestName, final String url) {
75          // No need to load jobs or profiles
76          super(jobsDir, false, false);
77          this.selfTestName = selfTestName;
78          this.selfTestUrl = url;
79      }
80      
81      @Override
82      public void crawlStarted(String message) {
83      	super.crawlStarted(message);
84      	this.getCurrentJob().getController().
85      		addCrawlURIDispositionListener(this);
86      }
87  
88      public void crawlEnded(String sExitMessage)  {
89          TestResult result = null;
90          try {
91              super.crawlEnded(sExitMessage);
92  
93              // At crawlEnded time, there is no current job.  Get the selftest
94              // job by pulling from the completedCrawlJobs queue.
95              List completedCrawlJobs = getCompletedJobs();
96              if (completedCrawlJobs == null || completedCrawlJobs.size() <= 0) {
97                  logger.severe("Selftest job did not complete.");
98              } else {
99                  CrawlJob job = (CrawlJob)completedCrawlJobs.
100                     get(completedCrawlJobs.size()-1);
101                 Test test = null;
102                 if (this.selfTestName != null &&
103                         this.selfTestName.length() > 0) {
104                     // Run single selftest only.
105                     // Get class for the passed single selftest.
106                     // Assume test to run is in this package.
107                     String thisClassName = this.getClass().getName();
108                     String pkg = thisClassName.
109                         substring(0, thisClassName.lastIndexOf('.'));
110                     // All selftests end in 'SelfTest'.
111                     String selftestClass = pkg + '.' + this.selfTestName +
112                         "SelfTest";
113                     // Need to make a list.  Make an array first.
114                     List<Class<?>> classList = new ArrayList<Class<?>>();
115                     classList.add(Class.forName(selftestClass));
116                     test = AllSelfTestCases.suite(this.selfTestUrl,
117                         job, job.getDirectory(), Heritrix.getHttpServer().
118                         getWebappPath(SELFTEST_WEBAPP), classList);
119                 } else {
120                     // Run all tests.
121                     test = AllSelfTestCases.suite(this.selfTestUrl,
122                         job, job.getDirectory(), Heritrix.getHttpServer().
123                         getWebappPath(SELFTEST_WEBAPP));
124                 }
125                 result = junit.textui.TestRunner.run(test);
126             }
127         } catch (Exception e) {
128             logger.info("Failed running selftest analysis: " + e.getMessage());
129         } finally  {
130             // TODO: This technique where I'm calling shutdown directly means
131             // we bypass the running of other crawlended handlers.  Means
132             // that such as the statistics tracker have no chance to run so
133             // reports are never generated.  Fix -- but preserve 0 or 1 exit
134             // code.
135             logger.info((new Date()).toString() + " Selftest " +
136                 (result != null && result.wasSuccessful()? "PASSED": "FAILED"));
137             stop();
138             Heritrix.shutdown(((result !=  null) && result.wasSuccessful())?
139                 0: 1);
140         }
141     }
142 
143 	public void crawledURIDisregard(CrawlURI curi) {
144 		// TODO Auto-generated method stub
145 	}
146 
147 	public void crawledURIFailure(CrawlURI curi) {
148 		// TODO Auto-generated method stub
149 	}
150 
151 	public void crawledURINeedRetry(CrawlURI curi) {
152 		// TODO Auto-generated method stub
153 	}
154 
155 	public void crawledURISuccessful(CrawlURI curi) {
156 		// If curi ends in 'Checkpoint/index.html', then run a Checkpoint.
157 		if (curi.toString().endsWith("/Checkpoint/")) {
158 			this.getCurrentJob().getController().requestCrawlCheckpoint();
159 		}
160 	}
161 }