View Javadoc

1   /* $Id: CheckpointSelfTest.java 4931 2007-02-21 18:48:17Z gojomo $
2    *
3    * Created Aug 15, 2006
4    *
5    * Copyright (C) 2006 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.selftest;
24  
25  import java.io.File;
26  import java.io.FileNotFoundException;
27  import java.io.IOException;
28  import java.util.logging.Logger;
29  
30  import javax.management.Attribute;
31  import javax.management.AttributeNotFoundException;
32  import javax.management.InvalidAttributeValueException;
33  import javax.management.MBeanException;
34  import javax.management.ReflectionException;
35  
36  import org.archive.crawler.admin.CrawlJob.MBeanCrawlController;
37  import org.archive.crawler.datamodel.Checkpoint;
38  import org.archive.crawler.datamodel.CrawlOrder;
39  import org.archive.crawler.datamodel.CrawlURI;
40  import org.archive.crawler.event.CrawlStatusListener;
41  import org.archive.crawler.event.CrawlURIDispositionListener;
42  import org.archive.crawler.framework.Checkpointer;
43  import org.archive.crawler.framework.CrawlController;
44  import org.archive.crawler.framework.exceptions.InitializationException;
45  import org.archive.crawler.settings.XMLSettingsHandler;
46  import org.archive.crawler.util.CheckpointUtils;
47  
48  
49  /***
50   * Assumes checkpoint was run during the SelfTest.
51   * @author stack
52   * @version $Date: 2007-02-21 18:48:17 +0000 (Wed, 21 Feb 2007) $ $Version$
53   */
54  public class CheckpointSelfTest extends SelfTestCase
55  implements CrawlStatusListener, CrawlURIDispositionListener {
56  	private final Logger LOG = Logger.getLogger(this.getClass().getName());
57  	private boolean crawlEnded = false;
58  
59  	public CheckpointSelfTest() {
60  		// TODO Auto-generated constructor stub
61  	}
62  
63  	public CheckpointSelfTest(String testName) {
64  		super(testName);
65  		// TODO Auto-generated constructor stub
66  	}
67  	
68  	/***
69  	 * Recover from the checkpoint made during selftest.
70  	 * @throws InitializationException 
71  	 * @throws IOException 
72  	 * @throws InvalidAttributeValueException 
73  	 * @throws ReflectionException 
74  	 * @throws MBeanException 
75  	 * @throws AttributeNotFoundException 
76  	 * @throws ClassNotFoundException 
77  	 * @throws InterruptedException 
78  	 */
79  	public void stestCheckpointRecover()
80  	throws InitializationException, IOException,
81  			InvalidAttributeValueException, AttributeNotFoundException,
82  			MBeanException, ReflectionException, ClassNotFoundException,
83  			InterruptedException {
84          assertInitialized();
85  		// Check checkpoint dir is in place.
86  		File f = getFile(getCrawlJobDir(), "checkpoints");
87  		// Use the first checkpoint in the dir.
88  		File cpdir = getFile(f, Checkpointer.formatCheckpointName("", 1));
89  		// Check valid checkpoint file is in place.
90  	    getFile(cpdir, Checkpoint.VALIDITY_STAMP_FILENAME);
91  	    // Get order file from checkpoint dir.
92  	    File order = getFile(cpdir, "order.xml");
93          XMLSettingsHandler handler =
94              new XMLSettingsHandler(order);
95          handler.initialize();
96          // Set recover-path to be this checkpoint dir.
97          handler.getOrder().setAttribute(
98          	new Attribute(CrawlOrder.ATTR_RECOVER_PATH, cpdir.toString()));
99          Checkpoint cp =
100         	CrawlController.getCheckpointRecover(handler.getOrder());
101         if (cp == null) {
102         	throw new NullPointerException("Failed read of checkpoint object");
103         }
104         CrawlController c = (MBeanCrawlController)CheckpointUtils.
105         	readObjectFromFile(MBeanCrawlController.class, cpdir);
106         c.initialize(handler);
107         c.addCrawlStatusListener(this);
108         c.addCrawlURIDispositionListener(this);
109         c.requestCrawlStart();
110         LOG.info("Recover from selftest crawl started using " +
111             order.toString() + ".");
112         // Wait here a while till its up and running?
113         while(!this.crawlEnded) {
114         	LOG.info("Waiting on recovered crawl to finish");
115         	Thread.sleep(1000);
116         }
117 	}
118 	
119 	private File getFile(final File parent, final String name)
120 	throws IOException {
121 		File f = new File(parent, name);
122 		if (!f.exists()) {
123 			throw new FileNotFoundException(f.getAbsolutePath());
124 		}
125 		if (!f.canRead()) {
126 			throw new IOException("Can't read " + f.getAbsolutePath());
127 		}
128 		return f;
129 	}
130 
131 	public void crawlCheckpoint(File checkpointDir) throws Exception {
132 		// TODO Auto-generated method stub
133 		
134 	}
135 
136 	public void crawlEnded(String sExitMessage) {
137 		this.crawlEnded = true;
138 	}
139 
140 	public void crawlEnding(String sExitMessage) {
141 		// TODO Auto-generated method stub
142 		
143 	}
144 
145 	public void crawlPaused(String statusMessage) {
146 		// TODO Auto-generated method stub
147 		
148 	}
149 
150 	public void crawlPausing(String statusMessage) {
151 		// TODO Auto-generated method stub
152 		
153 	}
154 
155 	public void crawlResuming(String statusMessage) {
156 		// TODO Auto-generated method stub
157 		
158 	}
159 
160 	public void crawlStarted(String message) {
161 		// TODO Auto-generated method stub
162 		
163 	}
164 
165 	public void crawledURIDisregard(CrawlURI curi) {
166 		// TODO Auto-generated method stub
167 		
168 	}
169 
170 	public void crawledURIFailure(CrawlURI curi) {
171 		// TODO Auto-generated method stub
172 		
173 	}
174 
175 	public void crawledURINeedRetry(CrawlURI curi) {
176 		// TODO Auto-generated method stub
177 		
178 	}
179 
180 	public void crawledURISuccessful(CrawlURI curi) {
181 		LOG.info(curi.toString());
182 	}
183 }