View Javadoc

1   /* Copyright (C) 2003 Internet Archive.
2    *
3    * This file is part of the Heritrix web crawler (crawler.archive.org).
4    *
5    * Heritrix is free software; you can redistribute it and/or modify
6    * it under the terms of the GNU Lesser Public License as published by
7    * the Free Software Foundation; either version 2.1 of the License, or
8    * any later version.
9    *
10   * Heritrix is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU Lesser Public License for more details.
14   *
15   * You should have received a copy of the GNU Lesser Public License
16   * along with Heritrix; if not, write to the Free Software
17   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18   *
19   * MemFPUURISet.java
20   * Created on Oct 1, 2003
21   *
22   * $Header$
23   */
24  package org.archive.crawler.util;
25  
26  import java.io.Serializable;
27  
28  import org.archive.util.ArchiveUtils;
29  import org.archive.util.fingerprint.LongFPSet;
30  
31  import st.ata.util.FPGenerator;
32  
33  /***
34   * UriUniqFilter storing 64-bit UURI fingerprints, using an internal LongFPSet
35   * instance. 
36   * 
37   * The passed LongFPSet internal instance may be disk or memory based. Accesses
38   * to the underlying LongFPSet are synchronized.
39   *
40   * @author gojomo
41   */
42  public class FPUriUniqFilter extends SetBasedUriUniqFilter 
43  implements Serializable {
44      // Be robust against trivial implementation changes
45      private static final long serialVersionUID =
46          ArchiveUtils.classnameBasedUID(FPUriUniqFilter.class, 1);
47      
48  //    private static Logger logger =
49  //        Logger.getLogger(FPUriUniqFilter.class.getName());
50      
51      private LongFPSet fpset;
52      private transient FPGenerator fpgen = FPGenerator.std64;
53      
54      /***
55       * Create FPUriUniqFilter wrapping given long set
56       * 
57       * @param fpset
58       */
59      public FPUriUniqFilter(LongFPSet fpset) {
60          this.fpset = fpset;
61      }
62      
63      private long getFp(CharSequence canonical) {
64          return fpgen.fp(canonical);
65      }
66  
67      protected boolean setAdd(CharSequence uri) {
68          return fpset.add(getFp(uri));
69      }
70  
71      protected long setCount() {
72          return fpset.count();
73      }
74  
75      protected boolean setRemove(CharSequence uri) {
76          return fpset.remove(getFp(uri));
77      }
78  }