View Javadoc

1   /*
2    * TestDomianScope
3    *
4    * $Id: DomainScopeTest.java 4651 2006-09-25 18:31:13Z paul_jack $
5    *
6    * Created on May 17, 2004
7    *
8    * Copyright (C) 2004 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify
13   * it under the terms of the GNU Lesser Public License as published by
14   * the Free Software Foundation; either version 2.1 of the License, or
15   * any later version.
16   *
17   * Heritrix is distributed in the hope that it will be useful,
18   * but WITHOUT ANY WARRANTY; without even the implied warranty of
19   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20   * GNU Lesser Public License for more details.
21   *
22   * You should have received a copy of the GNU Lesser Public License
23   * along with Heritrix; if not, write to the Free Software
24   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
25   */
26  
27  package org.archive.crawler.scope;
28  
29  import java.util.ArrayList;
30  import java.util.Iterator;
31  
32  import junit.framework.TestCase;
33  
34  import org.apache.commons.httpclient.URIException;
35  import org.archive.net.UURI;
36  import org.archive.net.UURIFactory;
37  
38  /***
39   * Test the domain scope focus filter.
40   *
41   * @author Igor Ranitovic
42   */
43  public class DomainScopeTest extends TestCase {
44  
45      private ArrayList<UURI> testSeeds;
46      private ArrayList<UURI> urlsInScope;
47      private ArrayList<UURI> urlsOutOfScope;
48  
49      private TestUnitDomainScope dc;
50  
51      /***
52       * Since testing only focus filter overwrite all other filter to return
53       * false.
54       *
55       * Also override seedsIterator so the test seeds are used.
56       */
57      @SuppressWarnings("deprecation")
58      private class TestUnitDomainScope extends DomainScope {
59  
60          private static final long serialVersionUID = 2509499903112690451L;
61  
62          public TestUnitDomainScope(String name) {
63              super(name);
64          }
65          
66          /* Force test seeds to be used. 
67           * @see org.archive.crawler.framework.CrawlScope#seedsIterator()
68           */
69          public Iterator<UURI> seedsIterator() {
70              return testSeeds.iterator();
71          }
72          
73          protected boolean additionalFocusAccepts(Object o) {
74              return false;
75          }
76  
77          protected boolean transitiveAccepts(Object o) {
78              return false;
79          }
80  
81          protected boolean excludeAccepts(Object o) {
82              return false;
83          }
84      }
85  
86      public void setUp() throws URIException {
87          testSeeds = new ArrayList<UURI>();
88          urlsInScope = new ArrayList<UURI>();
89          urlsOutOfScope = new ArrayList<UURI>();
90          dc = new TestUnitDomainScope("TESTCASE");
91  
92          // Add seeds
93          addURL(testSeeds, "http://www.a.com/");
94          addURL(testSeeds, "http://b.com/");
95          addURL(testSeeds, "http://www11.c.com");
96          addURL(testSeeds, "http://www.x.y.z.com/index.html");
97          addURL(testSeeds, "http://www.1.com/index.html");
98          addURL(testSeeds, "http://www.a_b.com/index.html");
99  
100 
101         // Add urls in domain scope
102         addURL(urlsInScope, "http://www.a.com/");
103         addURL(urlsInScope, "http://www1.a.com/");
104         addURL(urlsInScope, "http://a.com/");
105         addURL(urlsInScope, "http://a.a.com/");
106 
107         addURL(urlsInScope, "http://www.b.com/");
108         addURL(urlsInScope, "http://www1.b.com/");
109         addURL(urlsInScope, "http://b.com/");
110         addURL(urlsInScope, "http://b.b.com/");
111 
112         addURL(urlsInScope, "http://www.c.com/");
113         addURL(urlsInScope, "http://www1.c.com/");
114         addURL(urlsInScope, "http://c.com/");
115         addURL(urlsInScope, "http://c.c.com/");
116 
117         addURL(urlsInScope, "http://www.x.y.z.com/");
118         addURL(urlsInScope, "http://www1.x.y.z.com/");
119         addURL(urlsInScope, "http://x.y.z.com/");
120         addURL(urlsInScope, "http://xyz.x.y.z.com/");
121         addURL(urlsInScope, "http://1.com/index.html");
122         addURL(urlsInScope, "http://a_b.com/index.html");
123 
124         // Add urls out of scope
125         addURL(urlsOutOfScope, "http://a.co");
126         addURL(urlsOutOfScope, "http://a.comm");
127         addURL(urlsOutOfScope, "http://aa.com");
128         addURL(urlsOutOfScope, "http://z.com");
129         addURL(urlsOutOfScope, "http://y.z.com");
130     }
131 
132     public void addURL(ArrayList<UURI> list, String url) throws URIException {
133         list.add(UURIFactory.getInstance(url));
134     }
135 
136     public void testInScope() throws URIException {
137         for (Iterator i = this.urlsInScope.iterator(); i.hasNext();) {
138             Object url = i.next();
139             assertTrue("Should be in domain scope: " + url, dc.accepts(url));
140         }
141     }
142 
143     public void testOutOfScope() throws URIException {
144         for (Iterator i = this.urlsOutOfScope.iterator(); i.hasNext();) {
145             Object url = i.next();
146             assertFalse(
147                 "Should not be in domain scope: " + url,
148                 dc.accepts(url));
149         }
150     }
151 }