1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.deciderules;
26
27
28 import org.archive.util.SurtPrefixSet;
29
30
31 /***
32 * Rule applies configured decision to any URIs that
33 * are on one of the hosts in the configured set of
34 * hosts, filled from the seed set.
35 *
36 * @author gojomo
37 */
38 public class OnHostsDecideRule extends SurtPrefixedDecideRule {
39
40 private static final long serialVersionUID = -7566348189389792625L;
41
42
43
44 /***
45 * Usual constructor.
46 * @param name
47 */
48 public OnHostsDecideRule(String name) {
49 super(name);
50 setDescription(
51 "OnHostsDecideRule. Makes the configured decision " +
52 "for any URI which is on one of the hosts in the " +
53 "configured set of hostnames (derived from the seed" +
54 "list).");
55
56 getElementFromDefinition(ATTR_SEEDS_AS_SURT_PREFIXES).setTransient(true);
57 getElementFromDefinition(ATTR_SURTS_SOURCE_FILE).setTransient(true);
58
59
60 }
61
62 /***
63 * Patch the SURT prefix set so that it only includes host-enforcing prefixes
64 *
65 * @see org.archive.crawler.deciderules.SurtPrefixedDecideRule#readPrefixes()
66 */
67 protected void readPrefixes() {
68 buildSurtPrefixSet();
69 surtPrefixes.convertAllPrefixesToHosts();
70 dumpSurtPrefixSet();
71 }
72
73 protected String prefixFrom(String uri) {
74 return SurtPrefixSet.convertPrefixToHost(super.prefixFrom(uri));
75 }
76 }