View Javadoc

1   /* StripUserinfoRuleTest
2    * 
3    * Created on Oct 6, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    * 
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    * 
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.url.canonicalize;
24  
25  import org.apache.commons.httpclient.URIException;
26  import org.archive.net.UURIFactory;
27  
28  import junit.framework.TestCase;
29  
30  /***
31   * Test stripping of userinfo from an url.
32   * @author stack
33   * @version $Date: 2005-07-18 17:30:21 +0000 (Mon, 18 Jul 2005) $, $Revision: 3704 $
34   */
35  public class StripUserinfoRuleTest extends TestCase {
36      public void testCanonicalize() throws URIException {
37          String url = "http://WWW.aRchive.Org/index.html";
38          final String expectedResult = url;
39          String result = (new StripUserinfoRule("test")).
40              canonicalize(url, UURIFactory.getInstance(url));
41          assertTrue("Mangled no userinfo " + result,
42              url.equals(result));
43          url = "http://stack:password@WWW.aRchive.Org/index.html";
44          result = (new StripUserinfoRule("test")).
45              canonicalize(url, UURIFactory.getInstance(url));
46          assertTrue("Didn't strip userinfo " + result,
47              expectedResult.equals(result));
48          url = "http://stack:pass@@@@@@word@WWW.aRchive.Org/index.html";
49          result = (new StripUserinfoRule("test")).
50              canonicalize(url, 
51                  UURIFactory.getInstance("http://archive.org"));
52          assertTrue("Didn't get to last @ " + result,
53              expectedResult.equals(result));
54          url = "ftp://stack:pass@@@@@@word@archive.org/index.html";
55          result = (new StripUserinfoRule("test")).
56              canonicalize(url,
57                  UURIFactory.getInstance("http://archive.org"));
58          assertTrue("Didn't get to last @ " + result,
59              "ftp://archive.org/index.html".equals(result));
60      }
61  }