View Javadoc

1   /* StripWWWRuleTest
2    * 
3    * Created on Oct 6, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    * 
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    * 
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.url.canonicalize;
24  
25  import org.apache.commons.httpclient.URIException;
26  import org.archive.net.UURIFactory;
27  
28  import junit.framework.TestCase;
29  
30  /***
31   * Test stripping 'www' if present.
32   * @author stack
33   * @version $Date: 2006-09-18 20:32:47 +0000 (Mon, 18 Sep 2006) $, $Revision: 4634 $
34   */
35  public class StripWWWNRuleTest extends TestCase {
36  
37      public void testCanonicalize() throws URIException {
38          String url = "http://WWW.aRchive.Org/index.html";
39          String expectedResult = "http://aRchive.Org/index.html";
40          String result = (new StripWWWNRule("test")).
41              canonicalize(url, UURIFactory.getInstance(url));
42          assertTrue("Failed " + result, expectedResult.equals(result));
43          url = "http://www001.aRchive.Org/index.html";
44          result = (new StripWWWNRule("test")).
45              canonicalize(url, UURIFactory.getInstance(url));
46          assertTrue("Failed " + result, expectedResult.equals(result));
47          url = "http://www3.aRchive.Org/index.html";
48          result = (new StripWWWNRule("test")).
49              canonicalize(url, UURIFactory.getInstance(url));
50          assertTrue("Failed " + result, expectedResult.equals(result));
51      }
52  }