1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.url.canonicalize;
24
25 import org.apache.commons.httpclient.URIException;
26 import org.archive.net.UURIFactory;
27
28 import junit.framework.TestCase;
29
30 /***
31 * Test stripping 'www' if present.
32 * @author stack
33 * @version $Date: 2006-06-12 18:19:23 +0000 (Mon, 12 Jun 2006) $, $Revision: 4280 $
34 */
35 public class StripWWWRuleTest extends TestCase {
36
37 public void testCanonicalize() throws URIException {
38 String url = "http://WWW.aRchive.Org/index.html";
39 String expectedResult = "http://aRchive.Org/index.html";
40 String result = (new StripWWWRule("test")).
41 canonicalize(url, UURIFactory.getInstance(url));
42 assertTrue("Failed " + result, expectedResult.equals(result));
43 url = "http://wWWW.aRchive.Org/index.html";
44 expectedResult = "http://wWWW.aRchive.Org/index.html";
45 result = (new StripWWWRule("test")).
46 canonicalize(url, UURIFactory.getInstance(url));
47 assertTrue("Failed " + result, expectedResult.equals(result));
48 url = "http://ww.aRchive.Org/index.html";
49 expectedResult = "http://ww.aRchive.Org/index.html";
50 result = (new StripWWWRule("test")).
51 canonicalize(url, UURIFactory.getInstance(url));
52 assertTrue("Failed " + result, expectedResult.equals(result));
53 url = "http://www001.aRchive.Org/index.html";
54 expectedResult = "http://www001.aRchive.Org/index.html";
55 result = (new StripWWWRule("test")).
56 canonicalize(url, UURIFactory.getInstance(url));
57 assertTrue("Failed " + result, expectedResult.equals(result));
58 }
59 }