View Javadoc

1   /*
2    * RegularExpressionConstraint
3    *
4    * $Id: RegularExpressionConstraint.java 3666 2005-07-06 19:23:52Z stack-sf $
5    *
6    * Created on Mar 31, 2004
7    *
8    * Copyright (C) 2004 Internet Archive.
9    *
10   * This file is part of the Heritrix web crawler (crawler.archive.org).
11   *
12   * Heritrix is free software; you can redistribute it and/or modify it under the
13   * terms of the GNU Lesser Public License as published by the Free Software
14   * Foundation; either version 2.1 of the License, or any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful, but WITHOUT ANY
17   * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
18   * A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details.
19   *
20   * You should have received a copy of the GNU Lesser Public License along with
21   * Heritrix; if not, write to the Free Software Foundation, Inc., 59 Temple
22   * Place, Suite 330, Boston, MA 02111-1307 USA
23   */
24  package org.archive.crawler.settings;
25  
26  import java.io.Serializable;
27  import java.util.logging.Level;
28  
29  import org.archive.util.TextUtils;
30  
31  /***
32   * A constraint that checks that a value matches a regular expression. This
33   * constraint can only be applied to textual attributes.
34   *
35   * @author John Erik Halse
36   */
37  public class RegularExpressionConstraint
38  extends Constraint implements Serializable {
39      private static final long serialVersionUID = -5916211981136071809L;
40      private final String pattern;
41  
42      /***
43       * Constructs a new RegularExpressionConstraint.
44       *
45       * @param pattern the regular expression pattern the value must match.
46       * @param level the severity level.
47       * @param msg the default error message.
48       */
49      public RegularExpressionConstraint(String pattern, Level level, String msg) {
50          super(level, msg);
51          this.pattern = pattern;
52      }
53  
54      /***
55       * Constructs a new RegularExpressionConstraint using default severity level
56       * ({@link Level#WARNING}).
57       *
58       * @param pattern the regular expression pattern the value must match.
59       * @param msg the default error message.
60       */
61      public RegularExpressionConstraint(String pattern, String msg) {
62          this(pattern, Level.WARNING, msg);
63      }
64  
65      /***
66       * Constructs a new RegularExpressionConstraint using the default error
67       * message.
68       *
69       * @param pattern the regular expression pattern the value must match.
70       * @param level the severity level.
71       */
72      public RegularExpressionConstraint(String pattern, Level level) {
73          this(pattern, level, "Value did not match pattern: \"" + pattern + "\"");
74      }
75  
76      /***
77       * Constructs a new RegularExpressionConstraint.
78       *
79       * @param pattern the regular expression pattern the value must match.
80       */
81      public RegularExpressionConstraint(String pattern) {
82          this(pattern, Level.WARNING);
83      }
84  
85      /*
86       * (non-Javadoc)
87       *
88       * @see org.archive.crawler.settings.Constraint#innerCheck(org.archive.crawler.settings.Type,
89       *      javax.management.Attribute)
90       */
91      public FailedCheck innerCheck(CrawlerSettings settings, ComplexType owner,
92              Type definition, Object value) {
93          if (value instanceof CharSequence) {
94              if (!TextUtils
95                      .matches(pattern, (CharSequence) value)) {
96                  return new FailedCheck(settings, owner, definition, value);
97  
98              }
99          } else {
100             return new FailedCheck(settings, owner, definition, value,
101                     "Can't do regexp on non CharSequence.");
102         }
103         return null;
104     }
105 
106 }