1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.selftest;
24
25 import java.io.File;
26 import java.io.FileNotFoundException;
27 import java.io.IOException;
28 import java.util.logging.Logger;
29
30 import javax.management.Attribute;
31 import javax.management.AttributeNotFoundException;
32 import javax.management.InvalidAttributeValueException;
33 import javax.management.MBeanException;
34 import javax.management.ReflectionException;
35
36 import org.archive.crawler.admin.CrawlJob.MBeanCrawlController;
37 import org.archive.crawler.datamodel.Checkpoint;
38 import org.archive.crawler.datamodel.CrawlOrder;
39 import org.archive.crawler.datamodel.CrawlURI;
40 import org.archive.crawler.event.CrawlStatusListener;
41 import org.archive.crawler.event.CrawlURIDispositionListener;
42 import org.archive.crawler.framework.Checkpointer;
43 import org.archive.crawler.framework.CrawlController;
44 import org.archive.crawler.framework.exceptions.InitializationException;
45 import org.archive.crawler.settings.XMLSettingsHandler;
46 import org.archive.crawler.util.CheckpointUtils;
47
48
49 /***
50 * Assumes checkpoint was run during the SelfTest.
51 * @author stack
52 * @version $Date: 2007-02-21 18:48:17 +0000 (Wed, 21 Feb 2007) $ $Version$
53 */
54 public class CheckpointSelfTest extends SelfTestCase
55 implements CrawlStatusListener, CrawlURIDispositionListener {
56 private final Logger LOG = Logger.getLogger(this.getClass().getName());
57 private boolean crawlEnded = false;
58
59 public CheckpointSelfTest() {
60
61 }
62
63 public CheckpointSelfTest(String testName) {
64 super(testName);
65
66 }
67
68 /***
69 * Recover from the checkpoint made during selftest.
70 * @throws InitializationException
71 * @throws IOException
72 * @throws InvalidAttributeValueException
73 * @throws ReflectionException
74 * @throws MBeanException
75 * @throws AttributeNotFoundException
76 * @throws ClassNotFoundException
77 * @throws InterruptedException
78 */
79 public void stestCheckpointRecover()
80 throws InitializationException, IOException,
81 InvalidAttributeValueException, AttributeNotFoundException,
82 MBeanException, ReflectionException, ClassNotFoundException,
83 InterruptedException {
84 assertInitialized();
85
86 File f = getFile(getCrawlJobDir(), "checkpoints");
87
88 File cpdir = getFile(f, Checkpointer.formatCheckpointName("", 1));
89
90 getFile(cpdir, Checkpoint.VALIDITY_STAMP_FILENAME);
91
92 File order = getFile(cpdir, "order.xml");
93 XMLSettingsHandler handler =
94 new XMLSettingsHandler(order);
95 handler.initialize();
96
97 handler.getOrder().setAttribute(
98 new Attribute(CrawlOrder.ATTR_RECOVER_PATH, cpdir.toString()));
99 Checkpoint cp =
100 CrawlController.getCheckpointRecover(handler.getOrder());
101 if (cp == null) {
102 throw new NullPointerException("Failed read of checkpoint object");
103 }
104 CrawlController c = (MBeanCrawlController)CheckpointUtils.
105 readObjectFromFile(MBeanCrawlController.class, cpdir);
106 c.initialize(handler);
107 c.addCrawlStatusListener(this);
108 c.addCrawlURIDispositionListener(this);
109 c.requestCrawlStart();
110 LOG.info("Recover from selftest crawl started using " +
111 order.toString() + ".");
112
113 while(!this.crawlEnded) {
114 LOG.info("Waiting on recovered crawl to finish");
115 Thread.sleep(1000);
116 }
117 }
118
119 private File getFile(final File parent, final String name)
120 throws IOException {
121 File f = new File(parent, name);
122 if (!f.exists()) {
123 throw new FileNotFoundException(f.getAbsolutePath());
124 }
125 if (!f.canRead()) {
126 throw new IOException("Can't read " + f.getAbsolutePath());
127 }
128 return f;
129 }
130
131 public void crawlCheckpoint(File checkpointDir) throws Exception {
132
133
134 }
135
136 public void crawlEnded(String sExitMessage) {
137 this.crawlEnded = true;
138 }
139
140 public void crawlEnding(String sExitMessage) {
141
142
143 }
144
145 public void crawlPaused(String statusMessage) {
146
147
148 }
149
150 public void crawlPausing(String statusMessage) {
151
152
153 }
154
155 public void crawlResuming(String statusMessage) {
156
157
158 }
159
160 public void crawlStarted(String message) {
161
162
163 }
164
165 public void crawledURIDisregard(CrawlURI curi) {
166
167
168 }
169
170 public void crawledURIFailure(CrawlURI curi) {
171
172
173 }
174
175 public void crawledURINeedRetry(CrawlURI curi) {
176
177
178 }
179
180 public void crawledURISuccessful(CrawlURI curi) {
181 LOG.info(curi.toString());
182 }
183 }