1 /* AcceptDecideRule
2 *
3 * $Id: PrerequisiteAcceptDecideRule.java 4649 2006-09-25 17:16:55Z paul_jack $
4 *
5 * Created on Mar 3, 2005
6 *
7 * Copyright (C) 2005 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25 package org.archive.crawler.deciderules;
26
27 import org.archive.crawler.datamodel.CandidateURI;
28 import org.archive.crawler.extractor.Link;
29
30
31 /***
32 * Rule which ACCEPTs all 'prerequisite' URIs (those with a 'P' in
33 * the last hopsPath position). Good in a late position to ensure
34 * other scope settings don't lock out necessary prerequisites.
35 *
36 * @author gojomo
37 */
38 public class PrerequisiteAcceptDecideRule extends AcceptDecideRule {
39
40 private static final long serialVersionUID = 2762042167111186142L;
41
42 public PrerequisiteAcceptDecideRule(String name) {
43 super(name);
44 setDescription("PrerequisiteAcceptDecideRule. ACCEPTs " +
45 "all CrawlURIs discovered via a prerequisite " +
46 "'link'.");
47 }
48
49 public Object decisionFor(Object object) {
50 try {
51 String hopsPath = ((CandidateURI)object).getPathFromSeed();
52 if (hopsPath != null && hopsPath.length() > 0 &&
53 hopsPath.charAt(hopsPath.length()-1) == Link.PREREQ_HOP) {
54 return ACCEPT;
55 }
56 } catch (ClassCastException e) {
57 // Do nothing
58 }
59 return PASS;
60 }
61 }