View Javadoc

1   /* StripSessionIDsTest
2    * 
3    * Created on Oct 6, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    * 
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    * 
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.url.canonicalize;
24  
25  import org.apache.commons.httpclient.URIException;
26  import org.archive.net.UURIFactory;
27  
28  import junit.framework.TestCase;
29  
30  /***
31   * Test stripping of session ids.
32   * @author stack
33   * @version $Date: 2006-09-01 22:44:50 +0000 (Fri, 01 Sep 2006) $, $Revision: 4591 $
34   */
35  public class StripSessionIDsTest extends TestCase {
36      private static final String  BASE = "http://www.archive.org/index.html";
37      public void testCanonicalize() throws URIException {
38          String str32id = "0123456789abcdefghijklemopqrstuv";
39          String url = BASE + "?jsessionid=" + str32id;
40          String expectedResult = BASE + "?";
41          String result = (new StripSessionIDs("test")).
42              canonicalize(url, UURIFactory.getInstance(url));
43          assertTrue("Failed " + result, expectedResult.equals(result));
44          
45          // Test that we don't strip if not 32 chars only.
46          url = BASE + "?jsessionid=" + str32id + '0';
47          expectedResult = url;
48          result = (new StripSessionIDs("test")).
49              canonicalize(url, UURIFactory.getInstance(url));
50          assertTrue("Failed " + result, expectedResult.equals(result));
51          
52          // Test what happens when followed by another key/value pair.
53          url = BASE + "?jsessionid=" + str32id + "&x=y";
54          expectedResult = BASE + "?x=y";
55          result = (new StripSessionIDs("test")).
56              canonicalize(url, UURIFactory.getInstance(url));
57          assertTrue("Failed " + result, expectedResult.equals(result));
58          
59          // Test what happens when followed by another key/value pair and
60          // prefixed by a key/value pair.
61          url = BASE + "?one=two&jsessionid=" + str32id + "&x=y";
62          expectedResult = BASE + "?one=two&x=y";
63          result = (new StripSessionIDs("test")).
64              canonicalize(url, UURIFactory.getInstance(url));
65          assertTrue("Failed " + result, expectedResult.equals(result));
66          
67          // Test what happens when prefixed by a key/value pair.
68          url = BASE + "?one=two&jsessionid=" + str32id;
69          expectedResult = BASE + "?one=two&";
70          result = (new StripSessionIDs("test")).
71              canonicalize(url, UURIFactory.getInstance(url));
72          assertTrue("Failed " + result, expectedResult.equals(result));
73          
74          // Test aspsession.
75          url = BASE + "?aspsessionidABCDEFGH=" + "ABCDEFGHIJKLMNOPQRSTUVWX"
76              + "&x=y";
77          expectedResult = BASE + "?x=y";
78          result = (new StripSessionIDs("test")).
79              canonicalize(url, UURIFactory.getInstance(url));
80          assertTrue("Failed " + result, expectedResult.equals(result));
81          
82          // Test archive phpsession.
83          url = BASE + "?phpsessid=" + str32id + "&x=y";
84          expectedResult = BASE + "?x=y";
85          result = (new StripSessionIDs("test")).
86              canonicalize(url, UURIFactory.getInstance(url));
87          assertTrue("Failed " + result, expectedResult.equals(result));
88          
89          // With prefix too.
90          url = BASE + "?one=two&phpsessid=" + str32id + "&x=y";
91          expectedResult = BASE + "?one=two&x=y";
92          result = (new StripSessionIDs("test")).
93              canonicalize(url, UURIFactory.getInstance(url));
94          assertTrue("Failed " + result, expectedResult.equals(result));
95          
96          // With only prefix
97          url = BASE + "?one=two&phpsessid=" + str32id;
98          expectedResult = BASE + "?one=two&";
99          result = (new StripSessionIDs("test")).
100             canonicalize(url, UURIFactory.getInstance(url));
101         assertTrue("Failed " + result, expectedResult.equals(result));
102         
103         // Test sid.
104         url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&x=y";
105         expectedResult = BASE + "?x=y";
106         result = (new StripSessionIDs("test")).
107             canonicalize(url, UURIFactory.getInstance(url));
108         assertTrue("Failed " + result, expectedResult.equals(result));	
109         
110         // Igor test.
111         url = BASE + "?" + "sid=9682993c8daa2c5497996114facdc805" + "&" +
112             "jsessionid=" + str32id;
113         expectedResult = BASE + "?";
114         result = (new StripSessionIDs("test")).
115             canonicalize(url, UURIFactory.getInstance(url));
116         assertTrue("Failed " + result, expectedResult.equals(result));  
117     }
118 }