1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.util;
26 import java.io.File;
27 import java.io.IOException;
28 import java.util.ArrayList;
29 import java.util.HashSet;
30 import java.util.Iterator;
31 import java.util.logging.Logger;
32
33 import junit.framework.Test;
34 import junit.framework.TestSuite;
35
36 import org.apache.commons.httpclient.URIException;
37 import org.archive.crawler.datamodel.CandidateURI;
38 import org.archive.crawler.datamodel.UriUniqFilter;
39 import org.archive.net.UURI;
40 import org.archive.net.UURIFactory;
41 import org.archive.util.FileUtils;
42 import org.archive.util.TmpDirTestCase;
43
44 import com.sleepycat.je.DatabaseException;
45
46
47 /***
48 * Test BdbUriUniqFilter.
49 * @author stack
50 */
51 public class BdbUriUniqFilterTest extends TmpDirTestCase
52 implements UriUniqFilter.HasUriReceiver {
53 private Logger logger =
54 Logger.getLogger(BdbUriUniqFilterTest.class.getName());
55
56 private UriUniqFilter filter = null;
57 private File bdbDir = null;
58
59 /***
60 * Set to true if we visited received.
61 */
62 private boolean received = false;
63
64 protected void setUp() throws Exception {
65 super.setUp();
66
67 this.bdbDir = new File(getTmpDir(), this.getClass().getName());
68 if (this.bdbDir.exists()) {
69 FileUtils.deleteDir(bdbDir);
70 }
71 this.filter = new BdbUriUniqFilter(bdbDir, 50);
72 this.filter.setDestination(this);
73 }
74
75 protected void tearDown() throws Exception {
76 super.tearDown();
77 ((BdbUriUniqFilter)this.filter).close();
78
79
80
81 }
82
83 public void testAdding() throws URIException {
84 this.filter.add(this.getUri(),
85 new CandidateURI(UURIFactory.getInstance(this.getUri())));
86 this.filter.addNow(this.getUri(),
87 new CandidateURI(UURIFactory.getInstance(this.getUri())));
88 this.filter.addForce(this.getUri(),
89 new CandidateURI(UURIFactory.getInstance(this.getUri())));
90
91 assertTrue("Count is off", this.filter.count() == 1);
92 }
93
94 public void testCreateKey() {
95 String url = "dns:archive.org";
96 long fingerprint = BdbUriUniqFilter.createKey(url);
97 assertTrue("Fingerprint wrong " + url,
98 fingerprint == 8812917769287344085L);
99 url = "http://archive.org/index.html";
100 fingerprint = BdbUriUniqFilter.createKey(url);
101 assertTrue("Fingerprint wrong " + url,
102 fingerprint == 6613237167064754714L);
103 }
104
105 /***
106 * Verify that two URIs which gave colliding hashes, when previously
107 * the last 40bits of the composite did not sufficiently vary with certain
108 * inputs, no longer collide.
109 */
110 public void testCreateKeyCollisions() {
111 HashSet<Long> fingerprints = new HashSet<Long>();
112 fingerprints.add(new Long(BdbUriUniqFilter
113 .createKey("dns:mail.daps.dla.mil")));
114 fingerprints.add(new Long(BdbUriUniqFilter
115 .createKey("dns:militaryreview.army.mil")));
116 assertEquals("colliding fingerprints",2,fingerprints.size());
117 }
118
119 /***
120 * Time import of recovery log.
121 * REMOVE
122 * @throws IOException
123 * @throws DatabaseException
124 */
125 public void testWriting()
126 throws IOException, DatabaseException {
127 long maxcount = 1000;
128
129 String key = this.getClass().getName() + ".maxcount";
130 String maxcountStr = System.getProperty(key);
131 logger.info("Looking for override system property " + key);
132 if (maxcountStr != null && maxcountStr.length() > 0) {
133 maxcount = Long.parseLong(maxcountStr);
134 }
135 runTestWriting(maxcount);
136 }
137
138 protected void runTestWriting(long max)
139 throws DatabaseException, URIException {
140 long start = System.currentTimeMillis();
141 ArrayList<UURI> list = new ArrayList<UURI>(1000);
142 int count = 0;
143 for (; count < max; count++) {
144 UURI u = UURIFactory.getInstance("http://www" +
145 count + ".archive.org/" + count + "/index.html");
146 this.filter.add(u.toString(), new CandidateURI(u));
147 if (count > 0 && ((count % 100) == 0)) {
148 list.add(u);
149 }
150 if (count > 0 && ((count % 100000) == 0)) {
151 this.logger.info("Added " + count + " in " +
152 (System.currentTimeMillis() - start) +
153 " misses " +
154 ((BdbUriUniqFilter)this.filter).getCacheMisses() +
155 " diff of misses " +
156 ((BdbUriUniqFilter)this.filter).getLastCacheMissDiff());
157 }
158 }
159 this.logger.info("Added " + count + " in " +
160 (System.currentTimeMillis() - start));
161
162 start = System.currentTimeMillis();
163 for (Iterator i = list.iterator(); i.hasNext();) {
164 UURI uuri = (UURI)i.next();
165 this.filter.add(uuri.toString(), new CandidateURI(uuri));
166 }
167 this.logger.info("Added random " + list.size() + " in " +
168 (System.currentTimeMillis() - start));
169
170 start = System.currentTimeMillis();
171 for (Iterator i = list.iterator(); i.hasNext();) {
172 UURI uuri = (UURI)i.next();
173 this.filter.add(uuri.toString(), new CandidateURI(uuri));
174 }
175 this.logger.info("Deleted random " + list.size() + " in " +
176 (System.currentTimeMillis() - start));
177
178 assertTrue("Count is off: " + this.filter.count(),
179 this.filter.count() == max);
180 }
181
182 public void testNote() {
183 this.filter.note(this.getUri());
184 assertFalse("Receiver was called", this.received);
185 }
186
187 public void testForget() throws URIException {
188 this.filter.forget(this.getUri(),
189 new CandidateURI(UURIFactory.getInstance(getUri())));
190 assertTrue("Didn't forget", this.filter.count() == 0);
191 }
192
193 public void receive(CandidateURI item) {
194 this.received = true;
195 }
196
197 public String getUri() {
198 return "http://www.archive.org";
199 }
200
201 /***
202 * return the suite of tests for MemQueueTest
203 *
204 * @return the suite of test
205 */
206 public static Test suite() {
207 return new TestSuite(BdbUriUniqFilterTest.class);
208 }
209
210 public static void main(String[] args) {
211 junit.textui.TestRunner.run(suite());
212 }
213 }