1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.util;
25
26 import java.io.Serializable;
27
28 import org.archive.util.ArchiveUtils;
29 import org.archive.util.fingerprint.LongFPSet;
30
31 import st.ata.util.FPGenerator;
32
33 /***
34 * UriUniqFilter storing 64-bit UURI fingerprints, using an internal LongFPSet
35 * instance.
36 *
37 * The passed LongFPSet internal instance may be disk or memory based. Accesses
38 * to the underlying LongFPSet are synchronized.
39 *
40 * @author gojomo
41 */
42 public class FPUriUniqFilter extends SetBasedUriUniqFilter
43 implements Serializable {
44
45 private static final long serialVersionUID =
46 ArchiveUtils.classnameBasedUID(FPUriUniqFilter.class, 1);
47
48
49
50
51 private LongFPSet fpset;
52 private transient FPGenerator fpgen = FPGenerator.std64;
53
54 /***
55 * Create FPUriUniqFilter wrapping given long set
56 *
57 * @param fpset
58 */
59 public FPUriUniqFilter(LongFPSet fpset) {
60 this.fpset = fpset;
61 }
62
63 private long getFp(CharSequence canonical) {
64 return fpgen.fp(canonical);
65 }
66
67 protected boolean setAdd(CharSequence uri) {
68 return fpset.add(getFp(uri));
69 }
70
71 protected long setCount() {
72 return fpset.count();
73 }
74
75 protected boolean setRemove(CharSequence uri) {
76 return fpset.remove(getFp(uri));
77 }
78 }