1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.io.arc;
26
27 import java.io.BufferedInputStream;
28 import java.io.ByteArrayInputStream;
29 import java.io.ByteArrayOutputStream;
30 import java.io.File;
31 import java.io.FileNotFoundException;
32 import java.io.IOException;
33 import java.io.InputStream;
34 import java.io.PrintStream;
35 import java.util.Arrays;
36 import java.util.Date;
37 import java.util.Iterator;
38 import java.util.List;
39 import java.util.concurrent.atomic.AtomicInteger;
40
41 import org.apache.commons.io.IOUtils;
42 import org.apache.commons.io.input.NullInputStream;
43 import org.apache.commons.io.output.NullOutputStream;
44 import org.archive.io.ArchiveRecord;
45 import org.archive.io.ReplayInputStream;
46 import org.archive.io.WriterPoolMember;
47 import org.archive.util.ArchiveUtils;
48 import org.archive.util.FileUtils;
49 import org.archive.util.TmpDirTestCase;
50
51
52 /***
53 * Test ARCWriter class.
54 *
55 * This code exercises ARCWriter AND ARCReader. First it writes ARCs w/
56 * ARCWriter. Then it validates what was written w/ ARCReader.
57 *
58 * @author stack
59 */
60 public class ARCWriterTest
61 extends TmpDirTestCase implements ARCConstants {
62 /*** Utility class for writing bad ARCs (with trailing junk)
63 */
64 public class CorruptibleARCWriter extends ARCWriter {
65 byte[] endJunk = null;
66 public CorruptibleARCWriter(AtomicInteger serial_no, List<File> name, String name2, boolean compress, long default_max_arc_file_size) {
67 super(serial_no,name,name2,compress,default_max_arc_file_size);
68 }
69 @Override
70 protected void postWriteRecordTasks() throws IOException {
71 if(endJunk!=null) {
72 this.write(endJunk);
73 }
74 super.postWriteRecordTasks();
75 }
76 public void setEndJunk(byte[] b) throws IOException {
77 this.endJunk = b;
78 }
79 }
80
81 /***
82 * Prefix to use for ARC files made by JUNIT.
83 */
84 private static final String SUFFIX =
85
86
87 private static final String SOME_URL = "http://www.archive.org/test/";
88
89
90 private static final AtomicInteger SERIAL_NO = new AtomicInteger();
91
92
93
94
95 protected void setUp() throws Exception {
96 super.setUp();
97 }
98
99
100
101
102 protected void tearDown() throws Exception {
103 super.tearDown();
104 }
105
106 protected static String getContent() {
107 return getContent(null);
108 }
109
110 protected static String getContent(String indexStr) {
111 String page = (indexStr != null)? "Page #" + indexStr: "Some Page";
112 return "HTTP/1.1 200 OK\r\n" +
113 "Content-Type: text/html\r\n\r\n" +
114 "<html><head><title>" + page +
115 "</title></head>" +
116 "<body>" + page +
117 "</body></html>";
118 }
119
120 protected int writeRandomHTTPRecord(ARCWriter arcWriter, int index)
121 throws IOException {
122 String indexStr = Integer.toString(index);
123 ByteArrayOutputStream baos = new ByteArrayOutputStream();
124
125 String now = ArchiveUtils.get14DigitDate();
126 int recordLength = 0;
127 byte[] record = (getContent(indexStr)).getBytes();
128 recordLength += record.length;
129 baos.write(record);
130
131 baos.write("\n".getBytes());
132 recordLength += 1;
133 arcWriter.write("http://www.one.net/id=" + indexStr, "text/html",
134 "0.1.2.3", Long.parseLong(now), recordLength, baos);
135 return recordLength;
136 }
137
138 private File writeRecords(String baseName, boolean compress,
139 long maxSize, int recordCount)
140 throws IOException {
141 cleanUpOldFiles(baseName);
142 File [] files = {getTmpDir()};
143 ARCWriter arcWriter = new ARCWriter(SERIAL_NO, Arrays.asList(files),
144 baseName + '-' + SUFFIX, compress, maxSize);
145 assertNotNull(arcWriter);
146 for (int i = 0; i < recordCount; i++) {
147 writeRandomHTTPRecord(arcWriter, i);
148 }
149 arcWriter.close();
150 assertTrue("Doesn't exist: " +
151 arcWriter.getFile().getAbsolutePath(),
152 arcWriter.getFile().exists());
153 return arcWriter.getFile();
154 }
155
156 private void validate(File arcFile, int recordCount)
157 throws FileNotFoundException, IOException {
158 ARCReader reader = ARCReaderFactory.get(arcFile);
159 assertNotNull(reader);
160 List metaDatas = null;
161 if (recordCount == -1) {
162 metaDatas = reader.validate();
163 } else {
164 metaDatas = reader.validate(recordCount);
165 }
166 reader.close();
167
168
169
170 reader = ARCReaderFactory.get(arcFile);
171 for (int i = metaDatas.size() - 1; i >= 0; i--) {
172 ARCRecordMetaData meta = (ARCRecordMetaData)metaDatas.get(i);
173 ArchiveRecord r = reader.get(meta.getOffset());
174 String mimeType = r.getHeader().getMimetype();
175 assertTrue("Record is bogus",
176 mimeType != null && mimeType.length() > 0);
177 }
178 reader.close();
179 assertTrue("Metadatas not equal", metaDatas.size() == recordCount);
180 for (Iterator i = metaDatas.iterator(); i.hasNext();) {
181 ARCRecordMetaData r = (ARCRecordMetaData)i.next();
182 assertTrue("Record is empty", r.getLength() > 0);
183 }
184 }
185
186 public void testCheckARCFileSize()
187 throws IOException {
188 runCheckARCFileSizeTest("checkARCFileSize", false);
189 }
190
191 public void testCheckARCFileSizeCompressed()
192 throws IOException {
193 runCheckARCFileSizeTest("checkARCFileSize", true);
194 }
195
196 public void testWriteRecord() throws IOException {
197 final int recordCount = 2;
198 File arcFile = writeRecords("writeRecord", false,
199 DEFAULT_MAX_ARC_FILE_SIZE, recordCount);
200 validate(arcFile, recordCount + 1);
201 }
202
203 public void testRandomAccess() throws IOException {
204 final int recordCount = 3;
205 File arcFile = writeRecords("writeRecord", true,
206 DEFAULT_MAX_ARC_FILE_SIZE, recordCount);
207 ARCReader reader = ARCReaderFactory.get(arcFile);
208
209 boolean readFirst = false;
210 String url = null;
211 long offset = -1;
212 long totalRecords = 0;
213 boolean readSecond = false;
214 for (final Iterator i = reader.iterator(); i.hasNext(); totalRecords++) {
215 ARCRecord ar = (ARCRecord)i.next();
216 if (!readFirst) {
217 readFirst = true;
218 continue;
219 }
220 if (!readSecond) {
221 url = ar.getMetaData().getUrl();
222 offset = ar.getMetaData().getOffset();
223 readSecond = true;
224 }
225 }
226
227 reader = ARCReaderFactory.get(arcFile, offset);
228 ArchiveRecord ar = reader.get();
229 assertEquals(ar.getHeader().getUrl(), url);
230 ar.close();
231
232
233 reader = ARCReaderFactory.get(arcFile, offset);
234 int count = 0;
235 for (final Iterator i = reader.iterator(); i.hasNext(); i.next()) {
236 count++;
237 }
238 reader.close();
239 assertEquals(totalRecords - 1, count);
240 }
241
242 public void testWriteRecordCompressed() throws IOException {
243 final int recordCount = 2;
244 File arcFile = writeRecords("writeRecordCompressed", true,
245 DEFAULT_MAX_ARC_FILE_SIZE, recordCount);
246 validate(arcFile, recordCount + 1
247 }
248
249 public void testWriteGiantRecord() throws IOException {
250 File [] files = {getTmpDir()};
251 PrintStream dummyStream = new PrintStream(new NullOutputStream());
252 ARCWriter arcWriter = new ARCWriter(SERIAL_NO, dummyStream,
253 new File("dummy"),
254 false, null, null);
255 assertNotNull(arcWriter);
256
257
258 long now = System.currentTimeMillis();
259 long recordLength = org.apache.commons.io.FileUtils.ONE_GB * 3;
260
261 arcWriter.write("dummy:uri", "application/octet-stream",
262 "0.1.2.3", now, recordLength, new NullInputStream(recordLength));
263 arcWriter.close();
264 }
265
266 private void runCheckARCFileSizeTest(String baseName, boolean compress)
267 throws FileNotFoundException, IOException {
268 writeRecords(baseName, compress, 1024, 15);
269
270 File [] files = FileUtils.getFilesWithPrefix(getTmpDir(), SUFFIX);
271 for (int i = 0; i < files.length; i++) {
272 validate(files[i], -1);
273 }
274 }
275
276 protected CorruptibleARCWriter createARCWriter(String NAME, boolean compress) {
277 File [] files = {getTmpDir()};
278 return new CorruptibleARCWriter(SERIAL_NO, Arrays.asList(files), NAME,
279 compress, DEFAULT_MAX_ARC_FILE_SIZE);
280 }
281
282 protected static ByteArrayInputStream getBais(String str)
283 throws IOException {
284 return new ByteArrayInputStream(str.getBytes());
285 }
286
287 /***
288 * Writes a record, suppressing normal length-checks (so that
289 * intentionally malformed records may be written).
290 */
291 protected static void writeRecord(ARCWriter writer, String url,
292 String type, int len, ByteArrayInputStream bais)
293 throws IOException {
294 writer.write(url, type, "192.168.1.1", (new Date()).getTime(), len,
295 bais, false);
296 }
297
298 protected int iterateRecords(ARCReader r)
299 throws IOException {
300 int count = 0;
301 for (Iterator i = r.iterator(); i.hasNext();) {
302 ARCRecord rec = (ARCRecord)i.next();
303 rec.close();
304 if (count != 0) {
305 assertTrue("Unexpected URL " + rec.getMetaData().getUrl(),
306 rec.getMetaData().getUrl().equals(SOME_URL));
307 }
308 count++;
309 }
310 return count;
311 }
312
313 protected CorruptibleARCWriter createArcWithOneRecord(String name,
314 boolean compressed)
315 throws IOException {
316 CorruptibleARCWriter writer = createARCWriter(name, compressed);
317 String content = getContent();
318 writeRecord(writer, SOME_URL, "text/html",
319 content.length(), getBais(content));
320 return writer;
321 }
322
323 public void testSpaceInURL() {
324 String eMessage = null;
325 try {
326 holeyUrl("testSpaceInURL-" + SUFFIX, false, " ");
327 } catch (IOException e) {
328 eMessage = e.getMessage();
329 }
330 assertTrue("Didn't get expected exception: " + eMessage,
331 eMessage.startsWith("Metadata line doesn't match"));
332 }
333
334 public void testTabInURL() {
335 String eMessage = null;
336 try {
337 holeyUrl("testTabInURL-" + SUFFIX, false, "\t");
338 } catch (IOException e) {
339 eMessage = e.getMessage();
340 }
341 assertTrue("Didn't get expected exception: " + eMessage,
342 eMessage.startsWith("Metadata line doesn't match"));
343 }
344
345 protected void holeyUrl(String name, boolean compress, String urlInsert)
346 throws IOException {
347 ARCWriter writer = createArcWithOneRecord(name, compress);
348
349 String content = getContent();
350 writeRecord(writer, SOME_URL + urlInsert + "/index.html", "text/html",
351 content.length(), getBais(content));
352 writer.close();
353 }
354
355
356
357
358
359
360
361 public void testLengthTooShortCompressed() throws IOException {
362 lengthTooShort("testLengthTooShortCompressed-" + SUFFIX, true, false);
363 }
364
365 public void testLengthTooShortCompressedStrict()
366 throws IOException {
367 String eMessage = null;
368 try {
369 lengthTooShort("testLengthTooShortCompressedStrict-" + SUFFIX,
370 true, true);
371 } catch (RuntimeException e) {
372 eMessage = e.getMessage();
373 }
374 assertTrue("Didn't get expected exception: " + eMessage,
375 eMessage.startsWith("java.io.IOException: Record ENDING at"));
376 }
377
378 protected void lengthTooShort(String name, boolean compress, boolean strict)
379 throws IOException {
380 CorruptibleARCWriter writer = createArcWithOneRecord(name, compress);
381
382 String content = getContent();
383 ByteArrayInputStream bais = getBais(content+"SOME TRAILING BYTES");
384 writeRecord(writer, SOME_URL, "text/html",
385 content.length(), bais);
386 writer.setEndJunk("SOME TRAILING BYTES".getBytes());
387 writeRecord(writer, SOME_URL, "text/html",
388 content.length(), getBais(content));
389 writer.close();
390
391
392 ByteArrayOutputStream os = new ByteArrayOutputStream();
393 System.setErr(new PrintStream(os));
394
395 ARCReader r = ARCReaderFactory.get(writer.getFile());
396 r.setStrict(strict);
397 int count = iterateRecords(r);
398 assertTrue("Count wrong " + count, count == 4);
399
400
401
402 String err = os.toString();
403 assertTrue("No message " + err, err.startsWith("WARNING") &&
404 (err.indexOf("Record ENDING at") > 0));
405 }
406
407
408
409
410
411
412
413
414
415 public void testLengthTooLongCompressed()
416 throws IOException {
417 lengthTooLong("testLengthTooLongCompressed-" + SUFFIX,
418 true, false);
419 }
420
421 public void testLengthTooLongCompressedStrict() {
422 String eMessage = null;
423 try {
424 lengthTooLong("testLengthTooLongCompressed-" + SUFFIX,
425 true, true);
426 } catch (IOException e) {
427 eMessage = e.getMessage();
428 }
429 assertTrue("Didn't get expected exception: " + eMessage,
430 eMessage.startsWith("Premature EOF before end-of-record"));
431 }
432
433 protected void lengthTooLong(String name, boolean compress,
434 boolean strict)
435 throws IOException {
436 ARCWriter writer = createArcWithOneRecord(name, compress);
437
438 String content = getContent();
439 writeRecord(writer, SOME_URL, "text/html",
440 content.length() + 10, getBais(content));
441 writeRecord(writer, SOME_URL, "text/html",
442 content.length(), getBais(content));
443 writer.close();
444
445
446 ByteArrayOutputStream os = new ByteArrayOutputStream();
447 System.setErr(new PrintStream(os));
448
449 ARCReader r = ARCReaderFactory.get(writer.getFile());
450 r.setStrict(strict);
451 int count = iterateRecords(r);
452 assertTrue("Count wrong " + count, count == 4);
453
454
455
456 String err = os.toString();
457 assertTrue("No message " + err,
458 err.startsWith("WARNING Premature EOF before end-of-record"));
459 }
460
461 public void testGapError() throws IOException {
462 ARCWriter writer = createArcWithOneRecord("testGapError", true);
463 String content = getContent();
464
465
466 ReplayInputStream ris = new ReplayInputStream(content.getBytes(),
467 content.length(), null) {
468 public long remaining() {
469 return (super.remaining()==0) ? -1 : super.remaining();
470 }
471 };
472 String message = null;
473 try {
474 writer.write(SOME_URL, "text/html", "192.168.1.1",
475 (new Date()).getTime(), content.length(), ris);
476 } catch (IOException e) {
477 message = e.getMessage();
478 } finally {
479 IOUtils.closeQuietly(ris);
480 }
481 writer.close();
482 assertTrue("No gap when should be",
483 message != null &&
484 message.indexOf("Gap between expected and actual") >= 0);
485 }
486
487 /***
488 * Write an arc file for other tests to use.
489 * @param arcdir Directory to write to.
490 * @param compress True if file should be compressed.
491 * @return ARC written.
492 * @throws IOException
493 */
494 public static File createARCFile(File arcdir, boolean compress)
495 throws IOException {
496 File [] files = {arcdir};
497 ARCWriter writer = new ARCWriter(SERIAL_NO, Arrays.asList(files),
498 "test", compress, DEFAULT_MAX_ARC_FILE_SIZE);
499 String content = getContent();
500 writeRecord(writer, SOME_URL, "text/html", content.length(),
501 getBais(content));
502 writer.close();
503 return writer.getFile();
504 }
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521 public void testValidateMetaLine() throws Exception {
522 final String line = "http://www.aandw.net/images/walden2.png " +
523 "128.197.34.86 20060111174224 image/png 2160";
524 ARCWriter w = createARCWriter("testValidateMetaLine", true);
525 try {
526 w.validateMetaLine(line);
527 w.validateMetaLine(line + LINE_SEPARATOR);
528 w.validateMetaLine(line + "//r//n");
529 } finally {
530 w.close();
531 }
532 }
533
534 public void testArcRecordOffsetReads() throws Exception {
535 ARCRecord ar = getSingleRecord("testArcRecordInBufferStream");
536
537
538
539 final byte[] buffer = new byte[17];
540 final int maxRead = 4;
541 int totalRead = 0;
542 while (totalRead < maxRead) {
543 totalRead = totalRead
544 + ar.read(buffer, 13 + totalRead, maxRead - totalRead);
545 assertTrue(totalRead > 0);
546 }
547 }
548
549
550 public void testArchiveRecordAvailableConsistent() throws Exception {
551
552 ARCRecord record = getSingleRecord("testArchiveRecordAvailableConsistent");
553 int c = record.read();
554 while(c>=0) {
555 c = record.read();
556 }
557
558 for (int i=0; i<5; i++) {
559 assertTrue("available negative:"+record.available(), record.available()>=0);
560 assertEquals(-1, record.read());
561 }
562 }
563
564
565 public void testArchiveRecordEORConsistent() throws Exception {
566 ARCRecord record = getSingleRecord("testArchiveRecordEORConsistent");
567 this.readToEOS(record);
568
569 for (int i=0; i<5; i++) {
570 assertEquals(-1, record.read(new byte[1]));
571 }
572 }
573
574
575
576 public void testArchiveRecordMarkSupport() throws Exception {
577 ARCRecord record = getSingleRecord("testArchiveRecordMarkSupport");
578 record.setStrict(true);
579
580 InputStream stream = new BufferedInputStream(record);
581 if (stream.markSupported()) {
582 for (int i=0; i<3; i++) {
583 this.readToEOS(stream);
584 stream.mark(stream.available());
585 stream.reset();
586 }
587 stream.close();
588 }
589 }
590
591 protected void readToEOS(InputStream in) throws Exception {
592 byte [] buf = new byte[1024];
593 int read = 0;
594 while (read >= 0) {
595 read = in.read(buf);
596
597 }
598 }
599
600 protected ARCRecord getSingleRecord(String name) throws Exception {
601
602 WriterPoolMember w = createArcWithOneRecord(name, true);
603 w.close();
604
605 ARCReader r = ARCReaderFactory.get(w.getFile());
606 final Iterator<ArchiveRecord> i = r.iterator();
607
608 i.next();
609 i.hasNext();
610
611 return (ARCRecord) i.next();
612 }
613
614 }