View Javadoc

1   /*
2    * ExtractorURITest
3    *
4    * $Id: ExtractorImpliedURITest.java 4667 2006-09-26 20:38:48Z paul_jack $
5    *
6    * Created on August 30, 2006
7    *
8    * Copyright (C) 2006 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify
13   * it under the terms of the GNU Lesser Public License as published by
14   * the Free Software Foundation; either version 2.1 of the License, or
15   * any later version.
16   *
17   * Heritrix is distributed in the hope that it will be useful,
18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20   * GNU Lesser Public License for more details.
21   *
22   * You should have received a copy of the GNU Lesser Public License
23   * along with Heritrix; if not, write to the Free Software
24   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25   */
26  package org.archive.crawler.extractor;
27  
28  
29  import junit.framework.TestCase;
30  
31  /***
32   * Test ExtractorImpliedURI
33   * 
34   * @author gojomo
35   */
36  public class ExtractorImpliedURITest extends TestCase {
37      
38      public void testYouTubeExample() {
39          String startUri = 
40              "http://youtube.com/player2.swf?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904";
41          String expectedUri = 
42              "http://youtube.com/get_video?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904";
43          // without escaping: ^(http://[\w\.:@]*)/player2.swf\?(.*)$
44          String triggerPattern = "^(http://[//w//.:@]*)/player2.swf//?(.*)$";
45          String buildPattern = "$1/get_video?$2";
46          
47          String implied = ExtractorImpliedURI.extractImplied(
48                  startUri,triggerPattern,buildPattern);
49          assertEquals(expectedUri,implied);
50      }
51  }