1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.settings.refinements;
26
27 import org.archive.net.UURI;
28 import org.archive.util.TextUtils;
29
30
31 /***
32 * A refinement criteria that test if a URI matches a regular expression.
33 *
34 * @author John Erik Halse
35 */
36 public class RegularExpressionCriteria implements Criteria {
37 private String regexp = "";
38
39 /***
40 * Create a new instance of RegularExpressionCriteria.
41 */
42 public RegularExpressionCriteria() {
43 super();
44 }
45
46 /***
47 * Create a new instance of RegularExpressionCriteria initializing it with
48 * a regular expression.
49 *
50 * @param regexp the regular expression for this criteria.
51 */
52 public RegularExpressionCriteria(String regexp) {
53 setRegexp(regexp);
54 }
55
56
57
58
59 public boolean isWithinRefinementBounds(UURI uri) {
60 return (uri == null || uri == null)?
61 false: TextUtils.matches(regexp, uri.toString());
62 }
63
64 /***
65 * Get the regular expression to be matched against a URI.
66 *
67 * @return Returns the regexp.
68 */
69 public String getRegexp() {
70 return regexp;
71 }
72 /***
73 * Set the regular expression to be matched against a URI.
74 *
75 * @param regexp The regexp to set.
76 */
77 public void setRegexp(String regexp) {
78 this.regexp = regexp;
79 }
80
81
82
83
84 public String getName() {
85 return "Regular expression criteria";
86 }
87
88
89
90
91 public String getDescription() {
92 return "Accept URIs that match the following regular expression: "
93 + getRegexp();
94 }
95 }