1 /* Copyright (C) 2003 Internet Archive.
2 *
3 * This file is part of the Heritrix web crawler (crawler.archive.org).
4 *
5 * Heritrix is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser Public License as published by
7 * the Free Software Foundation; either version 2.1 of the License, or
8 * any later version.
9 *
10 * Heritrix is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser Public License
16 * along with Heritrix; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 *
19 * SeedCachingScope.java
20 * Created on Mar 25, 2005
21 *
22 * $Header$
23 */
24 package org.archive.crawler.scope;
25
26 import java.util.ArrayList;
27 import java.util.Iterator;
28 import java.util.List;
29
30 import org.archive.crawler.datamodel.CrawlURI;
31 import org.archive.net.UURI;
32
33 /***
34 * A CrawlScope that caches its seed list for the
35 * convenience of scope-tests that are based on the
36 * seeds.
37 *
38 * @author gojomo
39 *
40 */
41 public class SeedCachingScope extends ClassicScope {
42
43 private static final long serialVersionUID = 300230673616424926L;
44
45 //private static final Logger logger =
46 // Logger.getLogger(SeedCachingScope.class.getName());
47 List<UURI> seeds;
48
49 public SeedCachingScope(String name) {
50 super(name);
51 }
52
53 /* (non-Javadoc)
54 * @see org.archive.crawler.framework.CrawlScope#addSeed(org.archive.crawler.datamodel.UURI)
55 */
56 public boolean addSeed(CrawlURI curi) {
57 if (super.addSeed(curi) == false) {
58 // failed
59 return false;
60 }
61 // FIXME: This is not thread-safe.
62 List<UURI> newSeeds = new ArrayList<UURI>(seeds);
63 newSeeds.add(curi.getUURI());
64 seeds = newSeeds;
65 return true;
66 }
67
68 /* (non-Javadoc)
69 * @see org.archive.crawler.framework.CrawlScope#refreshSeeds()
70 */
71 public synchronized void refreshSeeds() {
72 super.refreshSeeds();
73 seeds = null;
74 fillSeedsCache();
75 }
76
77 /* (non-Javadoc)
78 * @see org.archive.crawler.framework.CrawlScope#seedsIterator()
79 */
80 public Iterator<UURI> seedsIterator() {
81 fillSeedsCache();
82 return seeds.iterator();
83 }
84
85 /***
86 * Ensure seeds cache is created/filled
87 */
88 protected synchronized void fillSeedsCache() {
89 if (seeds==null) {
90 seeds = new ArrayList<UURI>();
91 Iterator<UURI> iter = super.seedsIterator();
92 while(iter.hasNext()) {
93 seeds.add(iter.next());
94 }
95 }
96 }
97 }