1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 package org.archive.crawler.scope;
28
29 import java.util.ArrayList;
30 import java.util.Iterator;
31
32 import junit.framework.TestCase;
33
34 import org.apache.commons.httpclient.URIException;
35 import org.archive.net.UURI;
36 import org.archive.net.UURIFactory;
37
38 /***
39 * Test the domain scope focus filter.
40 *
41 * @author Igor Ranitovic
42 */
43 public class DomainScopeTest extends TestCase {
44
45 private ArrayList<UURI> testSeeds;
46 private ArrayList<UURI> urlsInScope;
47 private ArrayList<UURI> urlsOutOfScope;
48
49 private TestUnitDomainScope dc;
50
51 /***
52 * Since testing only focus filter overwrite all other filter to return
53 * false.
54 *
55 * Also override seedsIterator so the test seeds are used.
56 */
57 @SuppressWarnings("deprecation")
58 private class TestUnitDomainScope extends DomainScope {
59
60 private static final long serialVersionUID = 2509499903112690451L;
61
62 public TestUnitDomainScope(String name) {
63 super(name);
64 }
65
66
67
68
69 public Iterator<UURI> seedsIterator() {
70 return testSeeds.iterator();
71 }
72
73 protected boolean additionalFocusAccepts(Object o) {
74 return false;
75 }
76
77 protected boolean transitiveAccepts(Object o) {
78 return false;
79 }
80
81 protected boolean excludeAccepts(Object o) {
82 return false;
83 }
84 }
85
86 public void setUp() throws URIException {
87 testSeeds = new ArrayList<UURI>();
88 urlsInScope = new ArrayList<UURI>();
89 urlsOutOfScope = new ArrayList<UURI>();
90 dc = new TestUnitDomainScope("TESTCASE");
91
92
93 addURL(testSeeds, "http://www.a.com/");
94 addURL(testSeeds, "http://b.com/");
95 addURL(testSeeds, "http://www11.c.com");
96 addURL(testSeeds, "http://www.x.y.z.com/index.html");
97 addURL(testSeeds, "http://www.1.com/index.html");
98 addURL(testSeeds, "http://www.a_b.com/index.html");
99
100
101
102 addURL(urlsInScope, "http://www.a.com/");
103 addURL(urlsInScope, "http://www1.a.com/");
104 addURL(urlsInScope, "http://a.com/");
105 addURL(urlsInScope, "http://a.a.com/");
106
107 addURL(urlsInScope, "http://www.b.com/");
108 addURL(urlsInScope, "http://www1.b.com/");
109 addURL(urlsInScope, "http://b.com/");
110 addURL(urlsInScope, "http://b.b.com/");
111
112 addURL(urlsInScope, "http://www.c.com/");
113 addURL(urlsInScope, "http://www1.c.com/");
114 addURL(urlsInScope, "http://c.com/");
115 addURL(urlsInScope, "http://c.c.com/");
116
117 addURL(urlsInScope, "http://www.x.y.z.com/");
118 addURL(urlsInScope, "http://www1.x.y.z.com/");
119 addURL(urlsInScope, "http://x.y.z.com/");
120 addURL(urlsInScope, "http://xyz.x.y.z.com/");
121 addURL(urlsInScope, "http://1.com/index.html");
122 addURL(urlsInScope, "http://a_b.com/index.html");
123
124
125 addURL(urlsOutOfScope, "http://a.co");
126 addURL(urlsOutOfScope, "http://a.comm");
127 addURL(urlsOutOfScope, "http://aa.com");
128 addURL(urlsOutOfScope, "http://z.com");
129 addURL(urlsOutOfScope, "http://y.z.com");
130 }
131
132 public void addURL(ArrayList<UURI> list, String url) throws URIException {
133 list.add(UURIFactory.getInstance(url));
134 }
135
136 public void testInScope() throws URIException {
137 for (Iterator i = this.urlsInScope.iterator(); i.hasNext();) {
138 Object url = i.next();
139 assertTrue("Should be in domain scope: " + url, dc.accepts(url));
140 }
141 }
142
143 public void testOutOfScope() throws URIException {
144 for (Iterator i = this.urlsOutOfScope.iterator(); i.hasNext();) {
145 Object url = i.next();
146 assertFalse(
147 "Should not be in domain scope: " + url,
148 dc.accepts(url));
149 }
150 }
151 }