1 /*
2 * ExtractorURITest
3 *
4 * $Id: ExtractorImpliedURITest.java 4667 2006-09-26 20:38:48Z paul_jack $
5 *
6 * Created on August 30, 2006
7 *
8 * Copyright (C) 2006 Internet Archive.
9 *
10 * This file is part of the Heritrix web crawler (crawler.archive.org).
11 *
12 * Heritrix is free software; you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser Public License as published by
14 * the Free Software Foundation; either version 2.1 of the License, or
15 * any later version.
16 *
17 * Heritrix is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU Lesser Public License for more details.
21 *
22 * You should have received a copy of the GNU Lesser Public License
23 * along with Heritrix; if not, write to the Free Software
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 */
26 package org.archive.crawler.extractor;
27
28
29 import junit.framework.TestCase;
30
31 /***
32 * Test ExtractorImpliedURI
33 *
34 * @author gojomo
35 */
36 public class ExtractorImpliedURITest extends TestCase {
37
38 public void testYouTubeExample() {
39 String startUri =
40 "http://youtube.com/player2.swf?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904";
41 String expectedUri =
42 "http://youtube.com/get_video?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904";
43 // without escaping: ^(http://[\w\.:@]*)/player2.swf\?(.*)$
44 String triggerPattern = "^(http://[//w//.:@]*)/player2.swf//?(.*)$";
45 String buildPattern = "$1/get_video?$2";
46
47 String implied = ExtractorImpliedURI.extractImplied(
48 startUri,triggerPattern,buildPattern);
49 assertEquals(expectedUri,implied);
50 }
51 }