View Javadoc

1   /* PortnumberCriteria
2    *
3    * $Id: PortnumberCriteria.java 3704 2005-07-18 17:30:21Z stack-sf $
4    *
5    * Created on Apr 8, 2004
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.settings.refinements;
26  
27  import org.archive.net.UURI;
28  
29  
30  /***
31   * A refinement criterion that checks if a URI matches a specific port number.
32   * <p/>
33   * If the port number is not known it will try to use the default port number
34   * for the URI's scheme.
35   *
36   * @author John Erik Halse
37   */
38  public class PortnumberCriteria implements Criteria {
39      private int portNumber = 0;
40  
41      /***
42       * Create a new instance of PortnumberCriteria.
43       */
44      public PortnumberCriteria() {
45          super();
46      }
47  
48      /***
49       * Create a new instance of PortnumberCriteria.
50       *
51       * @param portNumber the port number for this criteria.
52       */
53      public PortnumberCriteria(String portNumber) {
54          setPortNumber(portNumber);
55      }
56  
57      /* (non-Javadoc)
58       * @see org.archive.crawler.settings.refinements.Criteria#isWithinRefinementBounds(org.archive.crawler.datamodel.UURI, int)
59       */
60      public boolean isWithinRefinementBounds(UURI uri) {
61          int port = uri.getPort();
62          if (port < 0) {
63              if (uri.getScheme().equals("http")) {
64                  port = 80;
65              } else if (uri.getScheme().equals("https")) {
66                  port = 443;
67              }
68          }
69  
70          return (port == portNumber)? true: false;
71      }
72  
73      /***
74       * Get the port number that is to be checked against a URI.
75       *
76       * @return Returns the portNumber.
77       */
78      public String getPortNumber() {
79          return String.valueOf(portNumber);
80      }
81      /***
82       * Set the port number that is to be checked against a URI.
83       *
84       * @param portNumber The portNumber to set.
85       */
86      public void setPortNumber(String portNumber) {
87          this.portNumber = Integer.parseInt(portNumber);
88      }
89  
90      /* (non-Javadoc)
91       * @see org.archive.crawler.settings.refinements.Criteria#getName()
92       */
93      public String getName() {
94          return "Port number criteria";
95      }
96  
97      /* (non-Javadoc)
98       * @see org.archive.crawler.settings.refinements.Criteria#getDescription()
99       */
100     public String getDescription() {
101         return "Accept URIs on port " + getPortNumber();
102     }
103 }