1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.filter;
25
26 import java.util.logging.Logger;
27
28 import javax.management.AttributeNotFoundException;
29
30 import org.archive.crawler.datamodel.CandidateURI;
31 import org.archive.crawler.datamodel.CrawlURI;
32 import org.archive.crawler.extractor.Link;
33 import org.archive.crawler.framework.CrawlScope;
34 import org.archive.crawler.framework.Filter;
35 import org.archive.crawler.scope.ClassicScope;
36
37 /***
38 * Accepts (returns for)) for all CandidateURIs passed in
39 * with a link-hop-count greater than the max-link-hops
40 * value.
41 *
42 * @author gojomo
43 * @deprecated As of release 1.10.0. Replaced by {@link DecidingFilter} and
44 * equivalent {@link DecideRule}.
45 */
46 public class HopsFilter extends Filter {
47
48 private static final long serialVersionUID = -5943030310651023640L;
49
50 private static final Logger logger =
51 Logger.getLogger(HopsFilter.class.getName());
52
53 /***
54 * @param name
55 */
56 public HopsFilter(String name) {
57 super(name, "Hops filter *Deprecated* Use" +
58 "DecidingFilter and equivalent DecideRule instead");
59 }
60
61 int maxLinkHops = Integer.MAX_VALUE;
62 int maxTransHops = Integer.MAX_VALUE;
63
64
65
66
67 protected boolean innerAccepts(Object o) {
68 if(! (o instanceof CandidateURI)) {
69 return false;
70 }
71 String path = ((CandidateURI)o).getPathFromSeed();
72 int linkCount = 0;
73 int transCount = 0;
74 for(int i=path.length()-1;i>=0;i--) {
75 if(path.charAt(i)==Link.NAVLINK_HOP) {
76 linkCount++;
77 } else if (linkCount==0) {
78 transCount++;
79 }
80 }
81 if (o instanceof CrawlURI) {
82 CrawlURI curi = (CrawlURI) o;
83 CrawlScope scope =
84 (CrawlScope) globalSettings().getModule(CrawlScope.ATTR_NAME);
85 try {
86 maxLinkHops =
87 ((Integer) scope
88 .getAttribute(ClassicScope.ATTR_MAX_LINK_HOPS, curi))
89 .intValue();
90 maxTransHops =
91 ((Integer) scope
92 .getAttribute(ClassicScope.ATTR_MAX_TRANS_HOPS, curi))
93 .intValue();
94 } catch (AttributeNotFoundException e) {
95 logger.severe(e.getMessage());
96
97 return true;
98 }
99 }
100
101 return (linkCount > maxLinkHops)|| (transCount>maxTransHops);
102 }
103 }