1 package org.archive.crawler.scope;
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 import java.io.File;
29 import java.io.FileWriter;
30 import java.io.IOException;
31 import java.io.PrintWriter;
32 import java.util.Comparator;
33 import java.util.Iterator;
34 import java.util.Set;
35 import java.util.TreeSet;
36
37 import org.apache.commons.httpclient.URIException;
38 import org.archive.crawler.datamodel.CrawlURI;
39 import org.archive.net.UURI;
40 import org.archive.net.UURIFactory;
41 import org.archive.util.TmpDirTestCase;
42
43
44 /***
45 * Test {@link SeedCachingScope}.
46 * @author stack gojomo
47 * @version $Revision: 4651 $, $Date: 2006-09-25 18:31:13 +0000 (Mon, 25 Sep 2006) $
48 */
49 public class SeedCachingScopeTest extends TmpDirTestCase {
50 /***
51 * Constrained SeedCachingScope subclass for testing
52 *
53 * @author gojomo
54 */
55 private class UnitTestSeedCachingScope extends SeedCachingScope {
56
57 private static final long serialVersionUID = -1651873833038665447L;
58
59 private File seedsfile;
60
61 public UnitTestSeedCachingScope(File seedsfile) {
62 super("test");
63 this.seedsfile = seedsfile;
64 }
65
66 public File getSeedfile() {
67 return seedsfile;
68 }
69 }
70
71 private static Set<UURI> seeds = null;
72
73 /***
74 * Comparator for treeset of uuris.
75 */
76 private static final Comparator<UURI> CMP = new Comparator<UURI> () {
77 public int compare(UURI o1, UURI o2) {
78 int result = -1;
79 if (o1 == null && o1 == null){
80 result = 0;
81 } else if (o1 == null) {
82 result = -1;
83 } else if (o2 == null) {
84 result = 1;
85 } else {
86 String s1 = o1.toString();
87 String s2 = o2.toString();
88 result = s1.compareTo(s2);
89 result = (result < 0)? result = -1:
90 (result > 0)? result = 1: 0;
91 }
92 return result;
93 }
94 };
95
96
97 /***
98 * Seed file reference.
99 */
100 private File seedsfile;
101
102
103
104
105
106 protected void setUp() throws Exception {
107 super.setUp();
108
109
110 SeedCachingScopeTest.seeds = new TreeSet<UURI>(SeedCachingScopeTest.CMP);
111 String [] uris = {"mailto:www.google.com",
112 "http://www.port.com:80/etc/motd2",
113 "http://a:b@userinfo.com/etc/motd2",
114 "news:www.google.com",
115 "http://www.google.com",
116 "https://www.google.com",
117 "gopher://www.google.com",
118 "news://www.google.com",
119 "rss://www.google.com",
120 "telnet://www.google.com",
121 "ftp://myname@example.com/etc/motd",
122 "ftp://example.com/etc/motd2"
123 };
124 for (int i = 0; i < uris.length; i++) {
125 SeedCachingScopeTest.seeds.add(UURIFactory.getInstance(uris[i]));
126 }
127
128
129 this.seedsfile = new File(getTmpDir(),
130 SeedCachingScopeTest.class.getName() + ".seedfile");
131 PrintWriter writer = new PrintWriter(new FileWriter(this.seedsfile));
132 for (int i = 0; i < uris.length; i++) {
133 writer.println(uris[i]);
134 }
135 writer.close();
136 }
137
138
139
140
141
142 protected void tearDown() throws Exception {
143 super.tearDown();
144 if (this.seedsfile.exists()) {
145 this.seedsfile.delete();
146 }
147 }
148
149 public void testGeneral() throws URIException {
150
151 SeedCachingScope sl = checkContent(SeedCachingScopeTest.seeds);
152
153 final CrawlURI curi = new CrawlURI(UURIFactory.getInstance("http://one.two.three"));
154 sl.addSeed(curi);
155 Set<UURI> set = new TreeSet<UURI>(SeedCachingScopeTest.CMP);
156 set.addAll(SeedCachingScopeTest.seeds);
157 set.add(curi.getUURI());
158 checkContent(sl, set);
159 }
160
161 public void testNoScheme() throws IOException {
162 final String NOSCHEME = "x.y.z";
163 FileWriter fw = new FileWriter(this.seedsfile, true);
164
165 fw.write("\n");
166 fw.write(NOSCHEME);
167 fw.flush();
168 fw.close();
169 boolean found = false;
170 SeedCachingScope sl = new UnitTestSeedCachingScope(seedsfile);
171 for (Iterator i = sl.seedsIterator(); i.hasNext();) {
172 UURI uuri = (UURI)i.next();
173 if (uuri.getHost() == null) {
174 continue;
175 }
176 if (uuri.getHost().equals(NOSCHEME)) {
177 found = true;
178 break;
179 }
180 }
181 assertTrue("Did not find " + NOSCHEME, found);
182 }
183
184 private SeedCachingScope checkContent(Set seedSet) {
185 return checkContent(null, seedSet);
186 }
187
188 private SeedCachingScope checkContent(SeedCachingScope sl, Set seedSet) {
189 if (sl == null) {
190 sl = new UnitTestSeedCachingScope(this.seedsfile);
191 }
192 int count = 0;
193 for (Iterator i = sl.seedsIterator(); i.hasNext();) {
194 count++;
195 UURI uuri = (UURI)i.next();
196 assertTrue("Does not contain: " + uuri.toString(),
197 seedSet.contains(uuri));
198 }
199 assertTrue("Different sizes: " + count + ", " + seedSet.size(),
200 count == seedSet.size());
201 return sl;
202 }
203 }
204