1 /* ArrayLongFPCache
2 *
3 * $Id: ArrayLongFPCache.java 3870 2005-10-06 05:01:49Z gojomo $
4 *
5 * Created on Oct 5, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25 package org.archive.util.fingerprint;
26
27 /***
28 * Simple long fingerprint cache using a backing array; any long maps to
29 * one of 'smear' slots. Longs inserted should be randomly distributed,
30 *
31 * @author gojomo
32 */
33 public class ArrayLongFPCache implements LongFPSet {
34 public static final int DEFAULT_CAPACITY = 1 << 20; // 1 million, 8MB
35 public static final int DEFAULT_SMEAR = 5;
36
37 long cache[] = new long[DEFAULT_CAPACITY];
38 int smear = DEFAULT_SMEAR;
39 int count = 0;
40
41 public void setCapacity(int newCapacity) {
42 long[] oldCache = cache;
43 cache = new long[newCapacity];
44 for(int i=0;i<oldCache.length;i++) {
45 add(oldCache[i]);
46 }
47 }
48
49 /* (non-Javadoc)
50 * @see org.archive.util.fingerprint.LongFPSet#add(long)
51 */
52 public boolean add(long l) {
53 if(contains(l)) {
54 return false;
55 }
56 int index = (Math.abs((int) (l % cache.length)) + (count % smear)) % cache.length;
57 count++;
58 cache[index]=l;
59 return true;
60 }
61
62 /* (non-Javadoc)
63 * @see org.archive.util.fingerprint.LongFPSet#contains(long)
64 */
65 public boolean contains(long l) {
66 int index = Math.abs((int) (l % cache.length));
67 for(int i = index; i < index + smear; i++) {
68 if(cache[i%cache.length]==l) {
69 return true;
70 }
71 }
72 return false;
73 }
74
75 /* (non-Javadoc)
76 * @see org.archive.util.fingerprint.LongFPSet#remove(long)
77 */
78 public boolean remove(long l) {
79 int index = Math.abs((int) (l % cache.length));
80 for(int i = index; i < index + smear; i++) {
81 if(cache[i%cache.length]==l) {
82 cache[i%cache.length]=0;
83 count = Math.min(count,cache.length);
84 count--;
85 return true;
86 }
87 }
88 return false;
89 }
90
91 /* (non-Javadoc)
92 * @see org.archive.util.fingerprint.LongFPSet#count()
93 */
94 public long count() {
95 return Math.min(count,cache.length);
96 }
97
98 /* (non-Javadoc)
99 * @see org.archive.util.fingerprint.LongFPSet#quickContains(long)
100 */
101 public boolean quickContains(long fp) {
102 return contains(fp);
103 }
104
105 public int cacheLength() {
106 return cache.length;
107 }
108
109 }