View Javadoc

1   /* MemLongSet
2    *
3    * $Id: MemLongFPSet.java 4644 2006-09-20 22:40:21Z paul_jack $
4    *
5    * Created on Oct 19, 2003
6    *
7    * Copyright (C) 2003 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.util.fingerprint;
26  
27  import java.io.Serializable;
28  import java.util.logging.Logger;
29  
30  import org.archive.util.AbstractLongFPSet;
31  
32  /***
33   * Open-addressing in-memory hash set for holding primitive long fingerprints.
34   *
35   * @author Gordon Mohr
36   */
37  public class MemLongFPSet extends AbstractLongFPSet
38  implements LongFPSet, Serializable {
39      
40      
41      private static final long serialVersionUID = -4301879539092625698L;
42  
43  
44      private static Logger logger =
45          Logger.getLogger(MemLongFPSet.class.getName());
46      private static final int DEFAULT_CAPACITY_POWER_OF_TWO = 10;
47      private static final float DEFAULT_LOAD_FACTOR = 0.75f;
48      protected byte[] slots;
49      protected long[] values;
50  
51      public MemLongFPSet() {
52          this(DEFAULT_CAPACITY_POWER_OF_TWO, DEFAULT_LOAD_FACTOR);
53      }
54  
55      /***
56       * @param capacityPowerOfTwo The capacity as the exponent of a power of 2.
57       *  e.g if the capacity is <code>4</code> this means <code>2^^4</code>
58       * entries.
59       * @param loadFactor The load factor as a fraction.  This gives the amount
60       * of free space to keep in the Set.
61       */
62      public MemLongFPSet(int capacityPowerOfTwo, float loadFactor) {
63          super(capacityPowerOfTwo, loadFactor);
64          slots = new byte[1 << capacityPowerOfTwo];
65          for(int i = 0; i < (1 << capacityPowerOfTwo); i++) {
66              slots[i] = EMPTY; // flag value for unused
67          }
68          values = new long[1 << capacityPowerOfTwo];
69      }
70  
71      protected void setAt(long i, long val) {
72          slots[(int)i] = 1;
73          values[(int)i] = val;
74      }
75  
76      protected long getAt(long i) {
77          return values[(int)i];
78      }
79  
80      protected void makeSpace() {
81          grow();
82      }
83  
84      private void grow() {
85          // Catastrophic event.  Log its occurance.
86          logger.info("Doubling fingerprinting slots to "
87              + (1 << this.capacityPowerOfTwo));
88          long[] oldValues = values;
89          byte[] oldSlots = slots;
90          capacityPowerOfTwo++;
91          values = new long[1 << capacityPowerOfTwo];
92          slots = new byte[1 << capacityPowerOfTwo];
93          for(int i = 0; i < (1 << capacityPowerOfTwo); i++) {
94              slots[i]=EMPTY; // flag value for unused
95          }
96          count=0;
97          for(int i = 0; i< oldValues.length; i++) {
98              if(oldSlots[i]>=0) {
99                  add(oldValues[i]);
100             }
101         }
102     }
103 
104     protected void relocate(long val, long oldIndex, long newIndex) {
105         values[(int)newIndex] = values[(int)oldIndex];
106         slots[(int)newIndex] = slots[(int)oldIndex];
107         slots[(int)oldIndex] = EMPTY;
108     }
109 
110     protected int getSlotState(long i) {
111         return slots[(int)i];
112     }
113 
114     protected void clearAt(long index) {
115         slots[(int)index]=EMPTY;
116         values[(int)index]=0;
117     }
118 
119     public boolean quickContains(long fp) {
120         return contains(fp);
121     }
122 }