View Javadoc

1   /* ReplayCharSequenceTest
2    *
3    * Created on Dec 26, 2006
4    *
5    * Copyright (C) 2006 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.io;
24  
25  import java.io.File;
26  import java.io.IOException;
27  import java.util.Date;
28  import java.util.logging.Logger;
29  
30  import org.archive.util.FileUtils;
31  import org.archive.util.TmpDirTestCase;
32  
33  /***
34   * Test ReplayCharSequences.
35   *
36   * @author stack, gojomo
37   * @version $Revision: 5848 $, $Date: 2008-06-28 01:20:38 +0000 (Sat, 28 Jun 2008) $
38   */
39  public class ReplayCharSequenceTest extends TmpDirTestCase
40  {
41      /***
42       * Logger.
43       */
44      private static Logger logger =
45          Logger.getLogger("org.archive.io.ReplayCharSequenceFactoryTest");
46  
47  
48      private static final int SEQUENCE_LENGTH = 127;
49      private static final int MULTIPLIER = 3;
50      private static final int BUFFER_SIZE = SEQUENCE_LENGTH * MULTIPLIER;
51      private static final int INCREMENT = 1;
52  
53      /***
54       * Buffer of regular content.
55       */
56      private byte [] regularBuffer = null;
57  
58      /*
59       * @see TestCase#setUp()
60       */
61      protected void setUp() throws Exception
62      {
63          super.setUp();
64          this.regularBuffer =
65              fillBufferWithRegularContent(new byte [BUFFER_SIZE]);
66      }
67  
68      public void testShiftjis() throws IOException {
69  
70          // Here's the bytes for the JIS encoding of the Japanese form of Nihongo
71          byte[] bytes_nihongo = {
72              (byte) 0x1B, (byte) 0x24, (byte) 0x42, (byte) 0x46,
73              (byte) 0x7C, (byte) 0x4B, (byte) 0x5C, (byte) 0x38,
74              (byte) 0x6C, (byte) 0x1B, (byte) 0x28, (byte) 0x42,
75              (byte) 0x1B, (byte) 0x28, (byte) 0x42 };
76          final String ENCODING = "SJIS";
77          // Here is nihongo converted to JVM encoding.
78          String nihongo = new String(bytes_nihongo, ENCODING);
79  
80          RecordingOutputStream ros = writeTestStream(
81                  bytes_nihongo,MULTIPLIER,
82                  "testShiftjis",MULTIPLIER);
83          // TODO: check for existence of overflow file?
84          ReplayCharSequence rcs = ros.getReplayCharSequence(ENCODING);
85              
86          // Now check that start of the rcs comes back in as nihongo string.
87          String rcsStr = rcs.subSequence(0, nihongo.length()).toString();
88          assertTrue("Nihongo " + nihongo + " does not equal converted string" +
89                  " from rcs " + rcsStr,
90              nihongo.equals(rcsStr));
91          // And assert next string is also properly nihongo.
92          if (rcs.length() >= (nihongo.length() * 2)) {
93              rcsStr = rcs.subSequence(nihongo.length(),
94                  nihongo.length() + nihongo.length()).toString();
95              assertTrue("Nihongo " + nihongo + " does not equal converted " +
96                  " string from rcs (2nd time)" + rcsStr,
97                  nihongo.equals(rcsStr));
98          }
99      }
100 
101     public void testGetReplayCharSequenceByteZeroOffset() throws IOException {
102 
103         RecordingOutputStream ros = writeTestStream(
104                 regularBuffer,MULTIPLIER,
105                 "testGetReplayCharSequenceByteZeroOffset",MULTIPLIER);
106         ReplayCharSequence rcs = ros.getReplayCharSequence();
107 
108         for (int i = 0; i < MULTIPLIER; i++) {
109             accessingCharacters(rcs);
110         }
111     }
112 
113     public void testGetReplayCharSequenceByteOffset() throws IOException {
114 
115         RecordingOutputStream ros = writeTestStream(
116                 regularBuffer,MULTIPLIER,
117                 "testGetReplayCharSequenceByteOffset",MULTIPLIER);
118         ReplayCharSequence rcs = ros.getReplayCharSequence(null,SEQUENCE_LENGTH);
119 
120         for (int i = 0; i < MULTIPLIER; i++) {
121             accessingCharacters(rcs);
122         }
123     }
124 
125     public void testGetReplayCharSequenceMultiByteZeroOffset()
126         throws IOException {
127 
128         RecordingOutputStream ros = writeTestStream(
129                 regularBuffer,MULTIPLIER,
130                 "testGetReplayCharSequenceMultiByteZeroOffset",MULTIPLIER);
131         ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8");
132 
133         for (int i = 0; i < MULTIPLIER; i++) {
134             accessingCharacters(rcs);
135         }
136     }
137 
138     public void testGetReplayCharSequenceMultiByteOffset() throws IOException {
139 
140         RecordingOutputStream ros = writeTestStream(
141                 regularBuffer,MULTIPLIER,
142                 "testGetReplayCharSequenceMultiByteOffset",MULTIPLIER);
143         ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8", SEQUENCE_LENGTH);
144 
145         try {
146             for (int i = 0; i < MULTIPLIER; i++) {
147                 accessingCharacters(rcs);
148             }
149         } finally {
150             rcs.close();
151         }
152     }
153     
154     public void testReplayCharSequenceByteToString() throws IOException {
155         String fileContent = "Some file content";
156         byte [] buffer = fileContent.getBytes();
157         RecordingOutputStream ros = writeTestStream(
158                 buffer,1,
159                 "testReplayCharSequenceByteToString.txt",0);
160         ReplayCharSequence rcs = ros.getReplayCharSequence();
161         String result = rcs.toString();
162         assertEquals("Strings don't match",result,fileContent);
163     }
164 
165     private String toHexString(String str)
166     {
167         if (str != null) {
168             StringBuilder buf = new StringBuilder("{ ");
169             buf.append(Integer.toString(str.charAt(0), 16));
170             for (int i = 1; i < str.length(); i++) {
171                 buf.append(", ");
172                 buf.append(Integer.toString(str.charAt(i), 16));
173             }
174             buf.append(" }");
175             return buf.toString();
176         }
177         else 
178             return "null";
179     }
180     
181     public void testSingleByteEncodings() throws IOException {
182         byte[] bytes = {
183             (byte) 0x61, (byte) 0x62, (byte) 0x63, (byte) 0x64,
184             (byte) 0x7d, (byte) 0x7e, (byte) 0x7f, (byte) 0x80,
185             (byte) 0x81, (byte) 0x82, (byte) 0x83, (byte) 0x84,
186             (byte) 0xfc, (byte) 0xfd, (byte) 0xfe, (byte) 0xff };
187 
188         String latin1String = new String(bytes, "latin1");
189         RecordingOutputStream ros = writeTestStream(
190                 bytes, 1, "testSingleByteEncodings-latin1.txt", 0);
191         ReplayCharSequence rcs = ros.getReplayCharSequence("latin1");
192         String result = rcs.toString();
193         logger.info("latin1[0] " + toHexString(latin1String));
194         logger.info("latin1[1] " + toHexString(result));
195         assertEquals("latin1 strings don't match", result, latin1String);
196         
197         String w1252String = new String(bytes, "windows-1252");
198         ros = writeTestStream(
199                 bytes, 1, "testSingleByteEncodings-windows-1252.txt", 0);
200         rcs = ros.getReplayCharSequence("windows-1252");
201         result = rcs.toString();
202         logger.info("windows-1252[0] " + toHexString(w1252String));
203         logger.info("windows-1252[1] " + toHexString(result));
204         assertEquals("windows-1252 strings don't match", result, w1252String);
205 
206         String asciiString = new String(bytes, "ascii");
207         ros = writeTestStream(
208                 bytes, 1, "testSingleByteEncodings-ascii.txt", 0);
209         rcs = ros.getReplayCharSequence("ascii");
210         result = rcs.toString();
211         logger.info("ascii[0] " + toHexString(asciiString));
212         logger.info("ascii[1] " + toHexString(result));
213         assertEquals("ascii strings don't match", result, asciiString);
214     }
215     
216     public void testReplayCharSequenceByteToStringOverflow() throws IOException {
217         String fileContent = "Some file content. ";
218         byte [] buffer = fileContent.getBytes();
219         RecordingOutputStream ros = writeTestStream(
220                 buffer,1,
221                 "testReplayCharSequenceByteToString.txt",1);
222         String expectedContent = fileContent+fileContent;
223         ReplayCharSequence rcs = ros.getReplayCharSequence();
224         String result = rcs.toString();
225         assertEquals("Strings don't match", expectedContent, result);
226     }
227     
228     public void testReplayCharSequenceByteToStringMulti() throws IOException {
229         String fileContent = "Some file content";
230         byte [] buffer = fileContent.getBytes("UTF-8");
231         final int MULTIPLICAND = 10;
232         StringBuilder sb =
233             new StringBuilder(MULTIPLICAND * fileContent.length());
234         for (int i = 0; i < MULTIPLICAND; i++) {
235             sb.append(fileContent);
236         }
237         String expectedResult = sb.toString();
238         RecordingOutputStream ros = writeTestStream(
239                 buffer,1,
240                 "testReplayCharSequenceByteToStringMulti.txt",MULTIPLICAND-1);
241         for (int i = 0; i < 3; i++) {
242             ReplayCharSequence rcs = ros.getReplayCharSequence("UTF-8");
243             String result = rcs.toString();
244             assertEquals("Strings don't match", result, expectedResult);
245             rcs.close();
246         }
247     }
248     
249     /***
250      * Accessing characters test.
251      *
252      * Checks that characters in the rcs are in sequence.
253      *
254      * @param rcs The ReplayCharSequence to try out.
255      */
256     private void accessingCharacters(CharSequence rcs) {
257         long timestamp = (new Date()).getTime();
258         int seeks = 0;
259         for (int i = (INCREMENT * 2); (i + INCREMENT) < rcs.length();
260                 i += INCREMENT) {
261             checkCharacter(rcs, i);
262             seeks++;
263             for (int j = i - INCREMENT; j < i; j++) {
264                 checkCharacter(rcs, j);
265                 seeks++;
266             }
267         }
268         // Note that printing out below breaks cruisecontrols drawing
269         // of the xml unit test results because it outputs disallowed
270         // xml characters.
271         logger.fine(rcs + " seeks count " + seeks + " in " +
272             ((new Date().getTime()) - timestamp) + " milliseconds.");
273     }
274 
275     /***
276      * Check the character read.
277      *
278      * Throws assertion if not expected result.
279      *
280      * @param rcs ReplayCharSequence to read from.
281      * @param i Character offset.
282      */
283     private void checkCharacter(CharSequence rcs, int i) {
284         int c = rcs.charAt(i);
285         assertTrue("Character " + Integer.toString(c) + " at offset " + i +
286             " unexpected.", (c % SEQUENCE_LENGTH) == (i % SEQUENCE_LENGTH));
287     }
288 
289     /***
290      * @param baseName
291      * @return RecordingOutputStream
292      * @throws IOException
293      */
294     private RecordingOutputStream writeTestStream(byte[] content, 
295             int memReps, String baseName, int fileReps) throws IOException {
296         String backingFilename = FileUtils.maybeRelative(getTmpDir(),baseName).getAbsolutePath();
297         RecordingOutputStream ros = new RecordingOutputStream(
298                 content.length * memReps,
299                 backingFilename);
300         ros.open();
301         for(int i = 0; i < (memReps+fileReps); i++) {
302             // fill buffer (repeat MULTIPLIER times) and 
303             // overflow to disk (also MULTIPLIER times)
304             ros.write(content);
305         }
306         ros.close();
307         return ros; 
308     }
309 
310 
311     /***
312      * Fill a buffer w/ regular progression of single-byte 
313      * (and <= 127) characters.
314      * @param buffer Buffer to fill.
315      * @return The buffer we filled.
316      */
317     private byte [] fillBufferWithRegularContent(byte [] buffer) {
318         int index = 0;
319         for (int i = 0; i < buffer.length; i++) {
320             buffer[i] = (byte) (index & 0x00ff);
321             index++;
322             if (index >= SEQUENCE_LENGTH) {
323                 // Reset the index.
324                 index = 0;
325             }
326         }
327         return buffer;
328     }
329 
330     public void testCheckParameters()
331     {
332         // TODO.
333     }
334 }