View Javadoc

1   /* BdbUriUniqFilterTest
2    *
3    * $Id: BdbUriUniqFilterTest.java 4647 2006-09-22 18:39:39Z paul_jack $
4    *
5    * Created on Sep 15, 2004.
6    *
7    * Copyright (C) 2003 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.util;
26  import java.io.File;
27  import java.io.IOException;
28  import java.util.ArrayList;
29  import java.util.HashSet;
30  import java.util.Iterator;
31  import java.util.logging.Logger;
32  
33  import junit.framework.Test;
34  import junit.framework.TestSuite;
35  
36  import org.apache.commons.httpclient.URIException;
37  import org.archive.crawler.datamodel.CandidateURI;
38  import org.archive.crawler.datamodel.UriUniqFilter;
39  import org.archive.net.UURI;
40  import org.archive.net.UURIFactory;
41  import org.archive.util.FileUtils;
42  import org.archive.util.TmpDirTestCase;
43  
44  import com.sleepycat.je.DatabaseException;
45  
46  
47  /***
48   * Test BdbUriUniqFilter.
49   * @author stack
50   */
51  public class BdbUriUniqFilterTest extends TmpDirTestCase
52  implements UriUniqFilter.HasUriReceiver {
53      private Logger logger =
54          Logger.getLogger(BdbUriUniqFilterTest.class.getName());
55      
56      private UriUniqFilter filter = null;
57      private File bdbDir = null;
58      
59      /***
60       * Set to true if we visited received.
61       */
62      private boolean received = false;
63      
64  	protected void setUp() throws Exception {
65  		super.setUp();
66          // Remove any bdb that already exists.
67          this.bdbDir = new File(getTmpDir(), this.getClass().getName());
68          if (this.bdbDir.exists()) {
69          	FileUtils.deleteDir(bdbDir);
70          }
71  		this.filter = new BdbUriUniqFilter(bdbDir, 50);
72  		this.filter.setDestination(this);
73      }
74      
75  	protected void tearDown() throws Exception {
76  		super.tearDown();
77          ((BdbUriUniqFilter)this.filter).close();
78          // if (this.bdbDir.exists()) {
79          //    FileUtils.deleteDir(bdbDir);
80          // }
81  	}
82      
83      public void testAdding() throws URIException {
84      	this.filter.add(this.getUri(),
85              new CandidateURI(UURIFactory.getInstance(this.getUri())));
86          this.filter.addNow(this.getUri(),
87              new CandidateURI(UURIFactory.getInstance(this.getUri())));
88          this.filter.addForce(this.getUri(),
89              new CandidateURI(UURIFactory.getInstance(this.getUri())));
90          // Should only have add 'this' once.
91          assertTrue("Count is off", this.filter.count() == 1);
92      }
93      
94      public void testCreateKey() {
95          String url = "dns:archive.org";
96          long fingerprint = BdbUriUniqFilter.createKey(url);
97          assertTrue("Fingerprint wrong " + url,
98              fingerprint == 8812917769287344085L);
99          url = "http://archive.org/index.html";
100         fingerprint = BdbUriUniqFilter.createKey(url);
101         assertTrue("Fingerprint wrong " + url,
102             fingerprint == 6613237167064754714L);
103     }
104     
105     /***
106      * Verify that two URIs which gave colliding hashes, when previously
107      * the last 40bits of the composite did not sufficiently vary with certain
108      * inputs, no longer collide. 
109      */
110     public void testCreateKeyCollisions() {
111         HashSet<Long> fingerprints = new HashSet<Long>();
112         fingerprints.add(new Long(BdbUriUniqFilter
113                 .createKey("dns:mail.daps.dla.mil")));
114         fingerprints.add(new Long(BdbUriUniqFilter
115                 .createKey("dns:militaryreview.army.mil")));
116         assertEquals("colliding fingerprints",2,fingerprints.size());
117     }
118     
119     /***
120      * Time import of recovery log.
121      * REMOVE
122      * @throws IOException
123      * @throws DatabaseException
124      */
125     public void testWriting()
126     throws IOException, DatabaseException {
127         long maxcount = 1000;
128         // Look for a system property to override default max count.
129         String key = this.getClass().getName() + ".maxcount";
130         String maxcountStr = System.getProperty(key);
131         logger.info("Looking for override system property " + key);
132         if (maxcountStr != null && maxcountStr.length() > 0) {
133         	maxcount = Long.parseLong(maxcountStr);
134         }
135         runTestWriting(maxcount);
136     }
137     
138     protected void runTestWriting(long max)
139     throws DatabaseException, URIException {
140         long start = System.currentTimeMillis();
141         ArrayList<UURI> list = new ArrayList<UURI>(1000);
142         int count = 0;
143         for (; count < max; count++) {
144             UURI u = UURIFactory.getInstance("http://www" +
145                 count + ".archive.org/" + count + "/index.html");
146             this.filter.add(u.toString(), new CandidateURI(u));
147             if (count > 0 && ((count % 100) == 0)) {
148                 list.add(u);
149             }
150             if (count > 0 && ((count % 100000) == 0)) {
151                 this.logger.info("Added " + count + " in " +
152                     (System.currentTimeMillis() - start) +
153                     " misses " +
154                     ((BdbUriUniqFilter)this.filter).getCacheMisses() +
155                     " diff of misses " +
156                     ((BdbUriUniqFilter)this.filter).getLastCacheMissDiff());
157             }
158         }
159         this.logger.info("Added " + count + " in " +
160             (System.currentTimeMillis() - start));
161         
162         start = System.currentTimeMillis();
163         for (Iterator i = list.iterator(); i.hasNext();) {
164             UURI uuri = (UURI)i.next();
165             this.filter.add(uuri.toString(), new CandidateURI(uuri));
166         }
167         this.logger.info("Added random " + list.size() + " in " +
168                 (System.currentTimeMillis() - start));
169         
170         start = System.currentTimeMillis();
171         for (Iterator i = list.iterator(); i.hasNext();) {
172             UURI uuri = (UURI)i.next();
173             this.filter.add(uuri.toString(), new CandidateURI(uuri));
174         }
175         this.logger.info("Deleted random " + list.size() + " in " +
176             (System.currentTimeMillis() - start));
177         // Looks like delete doesn't work.
178         assertTrue("Count is off: " + this.filter.count(),
179             this.filter.count() == max);
180     }
181     
182     public void testNote() {
183     	this.filter.note(this.getUri());
184         assertFalse("Receiver was called", this.received);
185     }
186     
187     public void testForget() throws URIException {
188         this.filter.forget(this.getUri(),
189             new CandidateURI(UURIFactory.getInstance(getUri())));
190         assertTrue("Didn't forget", this.filter.count() == 0);
191     }
192     
193 	public void receive(CandidateURI item) {
194 		this.received = true;
195 	}
196 
197 	public String getUri() {
198 		return "http://www.archive.org";
199 	}
200     
201     /***
202      * return the suite of tests for MemQueueTest
203      *
204      * @return the suite of test
205      */
206     public static Test suite() {
207         return new TestSuite(BdbUriUniqFilterTest.class);
208     }
209 
210     public static void main(String[] args) {
211     	junit.textui.TestRunner.run(suite());
212 	}
213 }