1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 package org.archive.crawler.deciderules;
22
23 import org.archive.crawler.datamodel.CandidateURI;
24 import org.archive.crawler.datamodel.CrawlURI;
25 import org.archive.crawler.framework.Frontier;
26 import org.archive.crawler.frontier.WorkQueue;
27
28 /***
29 * Applies configured decision to every candidate URI that would
30 * overbudget its queue. (total expended + pending > total budget).
31 * This rule has no impact on allready enqueued URIs, thus
32 * the right place to use it is the DecidingScope (triggered via LinksScoper)
33 *
34 * (Originally named QueueSizeLimiterDecideRule).
35 * @author Olaf Freyer
36 */
37 public class QueueOverbudgetDecideRule extends PredicatedDecideRule {
38
39 private static final long serialVersionUID = 5165201864629344642L;
40
41 public QueueOverbudgetDecideRule(String name) {
42 super(name);
43 setDescription("QueueOverbudgetDecideRule. "
44 + "Applies configured decision to every candidate URI that would "
45 + "overbudget its queue. (total expended + pending > total budget)."
46 + "This rule has no impact on already enqueued URIs, thus "
47 + "the right place to use it is the DecidingScope (triggered via LinksScoper) ");
48 }
49
50 @Override
51 protected boolean evaluate(Object object) {
52 if(! (object instanceof CandidateURI)) {
53 return false;
54 }
55
56 CandidateURI caUri = (CandidateURI) object;
57 Frontier frontier = getController().getFrontier();
58
59 CrawlURI curi;
60 if (caUri instanceof CrawlURI) {
61
62
63 return false;
64 } else {
65 curi = new CrawlURI(caUri.getUURI());
66 curi.setClassKey(frontier.getClassKey(curi));
67 }
68 WorkQueue wq = (WorkQueue) frontier.getGroup(curi);
69 return (wq.getPendingExpenditure() + wq.getTotalExpenditure())
70 > wq.getTotalBudget();
71 }
72 }