1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 package org.archive.crawler.url.canonicalize;
24
25 import java.util.regex.Pattern;
26
27
28 /***
29 * Strip cold fusion session ids.
30 * @author stack
31 * @version $Date: 2006-09-25 20:27:35 +0000 (Mon, 25 Sep 2006) $, $Revision: 4655 $
32 */
33 public class StripSessionCFIDs
34 extends BaseRule {
35
36 private static final long serialVersionUID = 9122689291157731293L;
37
38 private static final String REGEX = "^(.+)" +
39 "(?:cfid=[^&]+&cftoken=[^&]+(?:jsession=[^&]+)?)(?:&(.*))?$";
40
41 private static final String DESCRIPTION = "Strip ColdFusion session IDs. " +
42 "Use this rule to remove sessionids that look like the following: " +
43 "CFID=12412453&CFTOKEN=15501799 or " +
44 "CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A " +
45 "using the following case-insensitive regex: " + REGEX;
46
47 /***
48 * Examples:
49 * <pre>
50 * Examples:
51 * boo?CFID=1169580&CFTOKEN=48630702&dtstamp=22%2F08%2F2006%7C06%3A58%3A11
52 * boo?CFID=12412453&CFTOKEN=15501799&dt=19_08_2006_22_39_28
53 * boo?CFID=14475712&CFTOKEN=2D89F5AF-3048-2957-DA4EE4B6B13661AB&r=468710288378&m=forgotten
54 * boo?CFID=16603925&CFTOKEN=2AE13EEE-3048-85B0-56CEDAAB0ACA44B8&r=501652357733&l1=home
55 * boo?CFID=3304324&CFTOKEN=57491900&jsessionid=a63098d96360$B0$D9$A
56 * </pre>
57 */
58 private static final Pattern COLDFUSION_PATTERN =
59 Pattern.compile(REGEX, Pattern.CASE_INSENSITIVE);
60
61
62 public StripSessionCFIDs(String name) {
63 super(name, DESCRIPTION);
64 }
65
66 public String canonicalize(String url, Object context) {
67 return doStripRegexMatch(url, COLDFUSION_PATTERN.matcher(url));
68 }
69 }