1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.util;
26
27 import java.io.FileNotFoundException;
28 import java.io.IOException;
29 import java.util.ArrayList;
30 import java.util.Iterator;
31 import java.util.logging.Logger;
32
33 import junit.framework.TestCase;
34
35 import org.apache.commons.httpclient.URIException;
36 import org.archive.crawler.datamodel.CandidateURI;
37 import org.archive.crawler.datamodel.UriUniqFilter;
38 import org.archive.net.UURI;
39 import org.archive.net.UURIFactory;
40 import org.archive.util.fingerprint.MemLongFPSet;
41
42
43 /***
44 * Test FPUriUniqFilter.
45 * @author stack
46 */
47 public class FPUriUniqFilterTest extends TestCase
48 implements UriUniqFilter.HasUriReceiver {
49 private Logger logger =
50 Logger.getLogger(FPUriUniqFilterTest.class.getName());
51
52 private UriUniqFilter filter = null;
53
54 /***
55 * Set to true if we visited received.
56 */
57 private boolean received = false;
58
59 protected void setUp() throws Exception {
60 super.setUp();
61
62 this.filter = new FPUriUniqFilter(new MemLongFPSet(10, 0.75f));
63 this.filter.setDestination(this);
64 }
65
66 public void testAdding() throws URIException {
67 this.filter.add(this.getUri(),
68 new CandidateURI(UURIFactory.getInstance(this.getUri())));
69 this.filter.addNow(this.getUri(),
70 new CandidateURI(UURIFactory.getInstance(this.getUri())));
71 this.filter.addForce(this.getUri(),
72 new CandidateURI(UURIFactory.getInstance(this.getUri())));
73
74 assertTrue("Count is off", this.filter.count() == 1);
75 }
76
77 /***
78 * Test inserting and removing.
79 * @throws IOException
80 * @throws FileNotFoundException
81 */
82 public void testWriting() throws FileNotFoundException, IOException {
83 long start = System.currentTimeMillis();
84 ArrayList<UURI> list = new ArrayList<UURI>(1000);
85 int count = 0;
86 final int MAX_COUNT = 1000;
87 for (; count < MAX_COUNT; count++) {
88 UURI u = UURIFactory.getInstance("http://www" +
89 count + ".archive.org/" + count + "/index.html");
90 this.filter.add(u.toString(), new CandidateURI(u));
91 if (count > 0 && ((count % 100) == 0)) {
92 list.add(u);
93 }
94 }
95 this.logger.info("Added " + count + " in " +
96 (System.currentTimeMillis() - start));
97
98 start = System.currentTimeMillis();
99 for (Iterator i = list.iterator(); i.hasNext();) {
100 UURI uuri = (UURI)i.next();
101 this.filter.add(uuri.toString(), new CandidateURI(uuri));
102 }
103 this.logger.info("Added random " + list.size() + " in " +
104 (System.currentTimeMillis() - start));
105
106 start = System.currentTimeMillis();
107 for (Iterator i = list.iterator(); i.hasNext();) {
108 UURI uuri = (UURI)i.next();
109 this.filter.add(uuri.toString(), new CandidateURI(uuri));
110 }
111 this.logger.info("Deleted random " + list.size() + " in " +
112 (System.currentTimeMillis() - start));
113
114 assertTrue("Count is off: " + this.filter.count(),
115 this.filter.count() == MAX_COUNT);
116 }
117
118 public void testNote() {
119 this.filter.note(this.getUri());
120 assertFalse("Receiver was called", this.received);
121 }
122
123 public void testForget() throws URIException {
124 this.filter.forget(this.getUri(),
125 new CandidateURI(UURIFactory.getInstance(this.getUri())));
126 assertTrue("Didn't forget", this.filter.count() == 0);
127 }
128
129 public void receive(CandidateURI item) {
130 this.received = true;
131 }
132
133 public String getUri() {
134 return "http://www.archive.org";
135 }
136 }