View Javadoc

1   /* $Id:  $
2    *
3    * Copyright (C) 2007 Olaf Freyer
4    *
5    * This file is part of the Heritrix web crawler (crawler.archive.org).
6    *
7    * Heritrix is free software; you can redistribute it and/or modify
8    * it under the terms of the GNU Lesser Public License as published by
9    * the Free Software Foundation; either version 2.1 of the License, or
10   * any later version.
11   *
12   * Heritrix is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15   * GNU Lesser Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser Public License
18   * along with Heritrix; if not, write to the Free Software
19   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20   */
21  package org.archive.crawler.deciderules;
22  
23  import org.archive.crawler.datamodel.CandidateURI;
24  import org.archive.crawler.datamodel.CrawlURI;
25  import org.archive.crawler.framework.Frontier;
26  import org.archive.crawler.frontier.WorkQueue;
27  
28  /***
29   * Applies configured decision to every candidate URI that would 
30   * overbudget its queue. (total expended + pending > total budget).
31   * This rule has no impact on allready enqueued URIs, thus
32   * the right place to use it is the DecidingScope (triggered via LinksScoper)
33   * 
34   * (Originally named QueueSizeLimiterDecideRule).
35   * @author Olaf Freyer
36   */
37  public class QueueOverbudgetDecideRule extends PredicatedDecideRule {
38  
39      private static final long serialVersionUID = 5165201864629344642L;
40  
41      public QueueOverbudgetDecideRule(String name) {
42          super(name);
43          setDescription("QueueOverbudgetDecideRule. "
44                  + "Applies configured decision to every candidate URI that would "
45                  + "overbudget its queue. (total expended + pending > total budget)."
46                  + "This rule has no impact on already enqueued URIs, thus "
47                  + "the right place to use it is the DecidingScope (triggered via LinksScoper) ");
48      }
49  
50      @Override
51      protected boolean evaluate(Object object) {
52          if(! (object instanceof CandidateURI)) {
53              return false; 
54          }
55          
56          CandidateURI caUri = (CandidateURI) object;
57          Frontier frontier = getController().getFrontier();
58  
59          CrawlURI curi;
60          if (caUri instanceof CrawlURI) {
61              // this URI already has been enqueued - don't change previous
62              // decision
63              return false;
64          } else {
65              curi = new CrawlURI(caUri.getUURI());
66              curi.setClassKey(frontier.getClassKey(curi));
67          }
68          WorkQueue wq = (WorkQueue) frontier.getGroup(curi);
69          return (wq.getPendingExpenditure() + wq.getTotalExpenditure()) 
70                      > wq.getTotalBudget();
71      }
72  }