View Javadoc

1   /* IPQueueAssignmentPolicy
2   *
3   * $Id: IPQueueAssignmentPolicy.java 4667 2006-09-26 20:38:48Z paul_jack $
4   *
5   * Created on Oct 5, 2004
6   *
7   * Copyright (C) 2004 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */ 
25  package org.archive.crawler.frontier;
26  
27  import org.archive.crawler.datamodel.CandidateURI;
28  import org.archive.crawler.datamodel.CrawlHost;
29  import org.archive.crawler.framework.CrawlController;
30  
31  /***
32   * Uses target IP as basis for queue-assignment, unless it is unavailable,
33   * in which case it behaves as HostnameQueueAssignmentPolicy.
34   * 
35   * @author gojomo
36   */
37  public class IPQueueAssignmentPolicy
38  extends HostnameQueueAssignmentPolicy {
39      public String getClassKey(CrawlController controller, CandidateURI cauri) {
40          CrawlHost host = controller.getServerCache().getHostFor(cauri);
41          if (host == null || host.getIP() == null) {
42              // if no server or no IP, use superclass implementation
43              return super.getClassKey(controller, cauri);
44          }
45          // use dotted-decimal IP address
46          return host.getIP().getHostAddress();
47      }
48  }