View Javadoc

1   /* BdbMultipleWorkQueuesTest
2   *
3   * $Id: BdbMultipleWorkQueuesTest.java 4161 2006-01-30 23:10:35Z gojomo $
4   *
5   * Created on Jul 21, 2005
6   *
7   * Copyright (C) 2005 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */ 
25  package org.archive.crawler.frontier;
26  
27  import org.apache.commons.httpclient.URIException;
28  import org.archive.crawler.datamodel.CandidateURI;
29  import org.archive.crawler.datamodel.CrawlURI;
30  import org.archive.net.UURIFactory;
31  
32  import com.sleepycat.je.tree.Key;
33  
34  import junit.framework.TestCase;
35  
36  /***
37   * Unit tests for BdbMultipleWorkQueues functionality. 
38   * 
39   * @author gojomo
40   */
41  public class BdbMultipleWorkQueuesTest extends TestCase {
42      
43      /***
44       * Basic sanity checks for calculateInsertKey() -- ensure ordinal, cost,
45       * and schedulingDirective have the intended effects, for ordinal values
46       * up through 1/4th of the maximum (about 2^61).
47       * 
48       * @throws URIException
49       */
50      public void testCalculateInsertKey() throws URIException {
51          for (long ordinalOrigin = 1; ordinalOrigin < Long.MAX_VALUE / 4; ordinalOrigin <<= 1) {
52              CandidateURI cauri1 = 
53                  new CandidateURI(UURIFactory.getInstance("http://archive.org/foo"));
54              CrawlURI curi1 = new CrawlURI(cauri1, ordinalOrigin);
55              curi1.setClassKey("foo");
56              byte[] key1 = 
57                  BdbMultipleWorkQueues.calculateInsertKey(curi1).getData();
58              CandidateURI cauri2 = 
59                  new CandidateURI(UURIFactory.getInstance("http://archive.org/bar"));
60              CrawlURI curi2 = new CrawlURI(cauri2, ordinalOrigin + 1);
61              curi2.setClassKey("foo");
62              byte[] key2 = 
63                  BdbMultipleWorkQueues.calculateInsertKey(curi2).getData();
64              CandidateURI cauri3 = 
65                  new CandidateURI(UURIFactory.getInstance("http://archive.org/baz"));
66              CrawlURI curi3 = new CrawlURI(cauri3, ordinalOrigin + 2);
67              curi3.setClassKey("foo");
68              curi3.setSchedulingDirective(CandidateURI.HIGH);
69              byte[] key3 = 
70                  BdbMultipleWorkQueues.calculateInsertKey(curi3).getData();
71              CandidateURI cauri4 = 
72                  new CandidateURI(UURIFactory.getInstance("http://archive.org/zle"));
73              CrawlURI curi4 = new CrawlURI(cauri4, ordinalOrigin + 3);
74              curi4.setClassKey("foo");
75              curi4.setHolderCost(2);
76              byte[] key4 = 
77                  BdbMultipleWorkQueues.calculateInsertKey(curi4).getData();
78              CandidateURI cauri5 = 
79                  new CandidateURI(UURIFactory.getInstance("http://archive.org/gru"));
80              CrawlURI curi5 = new CrawlURI(cauri5, ordinalOrigin + 4);
81              curi5.setClassKey("foo");
82              curi5.setHolderCost(1);
83              byte[] key5 = 
84                  BdbMultipleWorkQueues.calculateInsertKey(curi5).getData();
85              // ensure that key1 (with lower ordinal) sorts before key2 (higher
86              // ordinal)
87              assertTrue("lower ordinal sorting first (" + ordinalOrigin + ")",
88                      Key.compareKeys(key1, key2, null) < 0);
89              // ensure that key3 (with HIGH scheduling) sorts before key2 (even
90              // though
91              // it has lower ordinal)
92              assertTrue("lower directive sorting first (" + ordinalOrigin + ")",
93                      Key.compareKeys(key3, key2, null) < 0);
94              // ensure that key5 (with lower cost) sorts before key4 (even though 
95              // key4  has lower ordinal and same default NORMAL scheduling directive)
96              assertTrue("lower cost sorting first (" + ordinalOrigin + ")", Key
97                      .compareKeys(key5, key4, null) < 0);
98          }
99      }
100 }