1 /* AbstractFrontier 2 * 3 * $Id: AbstractFrontier.java 5053 2007-04-10 02:34:20Z gojomo $ 4 * 5 * Created on June 18, 2007 6 * 7 * Copyright (C) 2007 Internet Archive. 8 * 9 * This file is part of the Heritrix web crawler (crawler.archive.org). 10 * 11 * Heritrix is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU Lesser Public License as published by 13 * the Free Software Foundation; either version 2.1 of the License, or 14 * any later version. 15 * 16 * Heritrix is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU Lesser Public License for more details. 20 * 21 * You should have received a copy of the GNU Lesser Public License 22 * along with Heritrix; if not, write to the Free Software 23 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 24 */ 25 package org.archive.crawler.frontier; 26 27 import org.archive.crawler.datamodel.CandidateURI; 28 import org.archive.crawler.framework.CrawlController; 29 import org.archive.net.PublicSuffixes; 30 31 /*** 32 * Create a queueKey based on the SURT authority, reduced to the 33 * public-suffix-plus-one domain (topmost assignable domain). 34 * 35 * @author gojomo 36 */ 37 public class TopmostAssignedSurtQueueAssignmentPolicy extends 38 SurtAuthorityQueueAssignmentPolicy { 39 40 @Override 41 public String getClassKey(CrawlController controller, CandidateURI cauri) { 42 String candidate = super.getClassKey(controller, cauri); 43 candidate = PublicSuffixes.reduceSurtToTopmostAssigned(candidate); 44 return candidate; 45 } 46 47 }