View Javadoc

1   /* ArrayLongFPCache
2   *
3   * $Id: ArrayLongFPCache.java 3870 2005-10-06 05:01:49Z gojomo $
4   *
5   * Created on Oct 5, 2005
6   *
7   * Copyright (C) 2005 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */ 
25  package org.archive.util.fingerprint;
26  
27  /***
28   * Simple long fingerprint cache using a backing array; any long maps to 
29   * one of 'smear' slots. Longs inserted should be randomly distributed, 
30   * 
31   * @author gojomo
32   */
33  public class ArrayLongFPCache implements LongFPSet {
34      public static final int DEFAULT_CAPACITY = 1 << 20; // 1 million, 8MB
35      public static final int DEFAULT_SMEAR = 5; 
36  
37      long cache[] = new long[DEFAULT_CAPACITY];
38      int smear = DEFAULT_SMEAR;
39      int count = 0;
40      
41      public void setCapacity(int newCapacity) {
42          long[] oldCache = cache;
43          cache = new long[newCapacity];
44          for(int i=0;i<oldCache.length;i++) {
45              add(oldCache[i]);
46          }
47      }
48      
49      /* (non-Javadoc)
50       * @see org.archive.util.fingerprint.LongFPSet#add(long)
51       */
52      public boolean add(long l) {
53          if(contains(l)) {
54              return false; 
55          }
56          int index = (Math.abs((int) (l % cache.length)) + (count % smear)) % cache.length;
57          count++;
58          cache[index]=l;
59          return true;
60      }
61  
62      /* (non-Javadoc)
63       * @see org.archive.util.fingerprint.LongFPSet#contains(long)
64       */
65      public boolean contains(long l) {
66          int index = Math.abs((int) (l % cache.length));
67          for(int i = index; i < index + smear; i++) {
68              if(cache[i%cache.length]==l) {
69                  return true;
70              }
71          }
72          return false;
73      }
74  
75      /* (non-Javadoc)
76       * @see org.archive.util.fingerprint.LongFPSet#remove(long)
77       */
78      public boolean remove(long l) {
79          int index = Math.abs((int) (l % cache.length));
80          for(int i = index; i < index + smear; i++) {
81              if(cache[i%cache.length]==l) {
82                  cache[i%cache.length]=0;
83                  count = Math.min(count,cache.length);
84                  count--;
85                  return true;
86              }
87          }
88          return false;
89      }
90  
91      /* (non-Javadoc)
92       * @see org.archive.util.fingerprint.LongFPSet#count()
93       */
94      public long count() {
95          return Math.min(count,cache.length);
96      }
97  
98      /* (non-Javadoc)
99       * @see org.archive.util.fingerprint.LongFPSet#quickContains(long)
100      */
101     public boolean quickContains(long fp) {
102         return contains(fp);
103     }
104     
105     public int cacheLength() {
106         return cache.length;
107     }
108 
109 }