1 /* AbstractFrontier
2 *
3 * $Id: AbstractFrontier.java 5053 2007-04-10 02:34:20Z gojomo $
4 *
5 * Created on June 18, 2007
6 *
7 * Copyright (C) 2007 Internet Archive.
8 *
9 * This file is part of the Heritrix web crawler (crawler.archive.org).
10 *
11 * Heritrix is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU Lesser Public License as published by
13 * the Free Software Foundation; either version 2.1 of the License, or
14 * any later version.
15 *
16 * Heritrix is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser Public License for more details.
20 *
21 * You should have received a copy of the GNU Lesser Public License
22 * along with Heritrix; if not, write to the Free Software
23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24 */
25 package org.archive.crawler.frontier;
26
27 import org.archive.crawler.datamodel.CandidateURI;
28 import org.archive.crawler.framework.CrawlController;
29 import org.archive.net.PublicSuffixes;
30
31 /***
32 * Create a queueKey based on the SURT authority, reduced to the
33 * public-suffix-plus-one domain (topmost assignable domain).
34 *
35 * @author gojomo
36 */
37 public class TopmostAssignedSurtQueueAssignmentPolicy extends
38 SurtAuthorityQueueAssignmentPolicy {
39
40 @Override
41 public String getClassKey(CrawlController controller, CandidateURI cauri) {
42 String candidate = super.getClassKey(controller, cauri);
43 candidate = PublicSuffixes.reduceSurtToTopmostAssigned(candidate);
44 return candidate;
45 }
46
47 }