1 /* 2 * ExtractorURITest 3 * 4 * $Id: ExtractorImpliedURITest.java 4667 2006-09-26 20:38:48Z paul_jack $ 5 * 6 * Created on August 30, 2006 7 * 8 * Copyright (C) 2006 Internet Archive. 9 * 10 * This file is part of the Heritrix web crawler (crawler.archive.org). 11 * 12 * Heritrix is free software; you can redistribute it and/or modify 13 * it under the terms of the GNU Lesser Public License as published by 14 * the Free Software Foundation; either version 2.1 of the License, or 15 * any later version. 16 * 17 * Heritrix is distributed in the hope that it will be useful, 18 * but WITHOUT ANY WARRANTY; without even the implied warranty of 19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 * GNU Lesser Public License for more details. 21 * 22 * You should have received a copy of the GNU Lesser Public License 23 * along with Heritrix; if not, write to the Free Software 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 25 */ 26 package org.archive.crawler.extractor; 27 28 29 import junit.framework.TestCase; 30 31 /*** 32 * Test ExtractorImpliedURI 33 * 34 * @author gojomo 35 */ 36 public class ExtractorImpliedURITest extends TestCase { 37 38 public void testYouTubeExample() { 39 String startUri = 40 "http://youtube.com/player2.swf?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904"; 41 String expectedUri = 42 "http://youtube.com/get_video?video_id=pv5zWaTEVkI&l=184&t=OEgsToPDskJrxamAv3Xm6ykQPSaw_f-Q&nc=16763904"; 43 // without escaping: ^(http://[\w\.:@]*)/player2.swf\?(.*)$ 44 String triggerPattern = "^(http://[//w//.:@]*)/player2.swf//?(.*)$"; 45 String buildPattern = "$1/get_video?$2"; 46 47 String implied = ExtractorImpliedURI.extractImplied( 48 startUri,triggerPattern,buildPattern); 49 assertEquals(expectedUri,implied); 50 } 51 }