View Javadoc

1   /* FPUriUniqFilterTest
2    *
3    * $Id: FPUriUniqFilterTest.java 4647 2006-09-22 18:39:39Z paul_jack $
4    *
5    * Created on Sep 15, 2004.
6    *
7    * Copyright (C) 2003 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.util;
26  
27  import java.io.FileNotFoundException;
28  import java.io.IOException;
29  import java.util.ArrayList;
30  import java.util.Iterator;
31  import java.util.logging.Logger;
32  
33  import junit.framework.TestCase;
34  
35  import org.apache.commons.httpclient.URIException;
36  import org.archive.crawler.datamodel.CandidateURI;
37  import org.archive.crawler.datamodel.UriUniqFilter;
38  import org.archive.net.UURI;
39  import org.archive.net.UURIFactory;
40  import org.archive.util.fingerprint.MemLongFPSet;
41  
42  
43  /***
44   * Test FPUriUniqFilter.
45   * @author stack
46   */
47  public class FPUriUniqFilterTest extends TestCase
48  implements UriUniqFilter.HasUriReceiver {
49      private Logger logger =
50          Logger.getLogger(FPUriUniqFilterTest.class.getName());
51  
52      private UriUniqFilter filter = null;
53      
54      /***
55       * Set to true if we visited received.
56       */
57      private boolean received = false;
58      
59  	protected void setUp() throws Exception {
60  		super.setUp();
61          // 17 makes a MemLongFPSet of one meg of longs (64megs).
62  		this.filter = new FPUriUniqFilter(new MemLongFPSet(10, 0.75f));
63  		this.filter.setDestination(this);
64      }
65      
66      public void testAdding() throws URIException {
67          this.filter.add(this.getUri(),
68              new CandidateURI(UURIFactory.getInstance(this.getUri())));
69          this.filter.addNow(this.getUri(),
70              new CandidateURI(UURIFactory.getInstance(this.getUri())));
71          this.filter.addForce(this.getUri(),
72              new CandidateURI(UURIFactory.getInstance(this.getUri())));
73          // Should only have add 'this' once.
74          assertTrue("Count is off", this.filter.count() == 1);
75      }
76      
77      /***
78       * Test inserting and removing.
79       * @throws IOException
80       * @throws FileNotFoundException
81       */
82      public void testWriting() throws FileNotFoundException, IOException {
83          long start = System.currentTimeMillis();
84          ArrayList<UURI> list = new ArrayList<UURI>(1000);
85          int count = 0;
86          final int MAX_COUNT = 1000;
87          for (; count < MAX_COUNT; count++) {
88          	UURI u = UURIFactory.getInstance("http://www" +
89          			count + ".archive.org/" + count + "/index.html");
90          	this.filter.add(u.toString(), new CandidateURI(u));
91          	if (count > 0 && ((count % 100) == 0)) {
92          		list.add(u);
93          	}
94          }
95          this.logger.info("Added " + count + " in " +
96          		(System.currentTimeMillis() - start));
97          
98          start = System.currentTimeMillis();
99          for (Iterator i = list.iterator(); i.hasNext();) {
100             UURI uuri = (UURI)i.next();
101             this.filter.add(uuri.toString(), new CandidateURI(uuri));
102         }
103         this.logger.info("Added random " + list.size() + " in " +
104         		(System.currentTimeMillis() - start));
105         
106         start = System.currentTimeMillis();
107         for (Iterator i = list.iterator(); i.hasNext();) {
108             UURI uuri = (UURI)i.next();
109             this.filter.add(uuri.toString(), new CandidateURI(uuri));
110         }
111         this.logger.info("Deleted random " + list.size() + " in " +
112             (System.currentTimeMillis() - start));
113         // Looks like delete doesn't work.
114         assertTrue("Count is off: " + this.filter.count(),
115             this.filter.count() == MAX_COUNT);
116     }
117     
118     public void testNote() {
119     	this.filter.note(this.getUri());
120         assertFalse("Receiver was called", this.received);
121     }
122     
123     public void testForget() throws URIException {
124         this.filter.forget(this.getUri(),
125                 new CandidateURI(UURIFactory.getInstance(this.getUri())));
126         assertTrue("Didn't forget", this.filter.count() == 0);
127     }
128     
129 	public void receive(CandidateURI item) {
130 		this.received = true;
131 	}
132 
133 	public String getUri() {
134 		return "http://www.archive.org";
135 	}
136 }