View Javadoc

1   /*
2    * ExtractorURITest
3    *
4    * $Id: ExtractorURITest.java 4595 2006-09-02 00:43:59Z gojomo $
5    *
6    * Created on August 30, 2006
7    *
8    * Copyright (C) 2006 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify
13   * it under the terms of the GNU Lesser Public License as published by
14   * the Free Software Foundation; either version 2.1 of the License, or
15   * any later version.
16   *
17   * Heritrix is distributed in the hope that it will be useful,
18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20   * GNU Lesser Public License for more details.
21   *
22   * You should have received a copy of the GNU Lesser Public License
23   * along with Heritrix; if not, write to the Free Software
24   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25   */
26  package org.archive.crawler.extractor;
27  
28  import java.util.List;
29  
30  import org.archive.net.UURI;
31  
32  import junit.framework.TestCase;
33  
34  /***
35   * Test ExtractorURI
36   * 
37   * @author gojomo
38   */
39  public class ExtractorURITest extends TestCase {
40      
41      public void testFullQuery() {
42          String queryStringUri = "http://www.example2.com";
43          innerTestQueryString(queryStringUri,queryStringUri);
44      }
45  
46      public void testFullQueryEncoded() {
47          String queryStringUri = "http%3A//www.example2.com/";
48          String expectedUri = "http://www.example2.com/";
49          innerTestQueryString(queryStringUri,expectedUri);
50      }
51      
52      public void testFullQueryEncodedComplex() {
53          String queryStringUri = "http%3A//www.example2.com/foo%3Fbar%3Dbz%26red%3Dblue";
54          String expectedUri = "http://www.example2.com/foo?bar=bz&red=blue";
55          innerTestQueryString(queryStringUri,expectedUri);
56      }
57      
58      private void innerTestQueryString(String queryStringUri, String expectedUri) {
59          UURI uuri = UURI.from(
60                  "http://www.example.com/foo?"+queryStringUri);
61          innerTestForPresence(uuri, expectedUri);
62      }
63  
64      private void innerTestForPresence(UURI uuri, String expectedUri) {
65          List<String> results = ExtractorURI.extractQueryStringLinks(uuri);
66          assertTrue(
67                  "URI not found: "+expectedUri,
68                  results.contains(expectedUri));
69      }
70      
71      public void testParameterComplex() {
72          String parameterUri = "http%3A//www.example2.com/foo%3Fbar%3Dbz%26red%3Dblue";
73          String expectedUri = "http://www.example2.com/foo?bar=bz&red=blue";
74          UURI uuri = UURI.from(
75                  "http://www.example.com/foo?uri="+parameterUri+"&foo=bar");
76          innerTestForPresence(uuri,expectedUri);
77      }
78  }