View Javadoc

1   /* SubElement
2   *
3   * $Id: SubElement.java 4465 2006-08-08 18:25:42Z stack-sf $
4   *
5   * Created on July 26, 2006.
6   *
7   * Copyright (C) 2006 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25  package org.archive.util.anvl;
26  
27  /***
28   * Abstract ANVL 'data element' sub-part.
29   * Subclass to make a Comment, a Label, or a Value.
30   * @author stack
31   */
32  abstract class SubElement {
33      private final String e;
34  
35      protected SubElement() {
36          this(null);
37      }
38  
39      public SubElement(final String s) {
40          this.e = baseCheck(s);
41      }
42  
43      protected String baseCheck(final String s) {
44          // Check for null.
45          if (s == null || s.length() <= 0) {
46              throw new IllegalArgumentException("Can't be null or empty");
47          }
48          // Check for CRLF.
49          for (int i = 0; i < s.length(); i++) {
50              checkCharacter(s.charAt(i), s, i);
51          }
52          return s;
53      }
54      
55      protected void checkCharacter(final char c, final String srcStr,
56      		final int index) {
57          checkControlCharacter(c, srcStr, index);
58          checkCRLF(c, srcStr, index);
59      }
60      
61      protected void checkControlCharacter(final char c, final String srcStr,
62              final int index) {
63          if (Character.isISOControl(c) && !Character.isWhitespace(c) ||
64                  !Character.isValidCodePoint(c)) {
65              throw new IllegalArgumentException(srcStr +
66                  " contains a control character(s) or invalid code point: 0x" +
67                  Integer.toHexString(c));
68          }
69      }
70      
71      protected void checkCRLF(final char c, final String srcStr,
72              final int index) {
73          if (ANVLRecord.isCROrLF(c)) {
74              throw new IllegalArgumentException(srcStr +
75                  " contains disallowed CRLF control character(s): 0x" +
76                  Integer.toHexString(c));
77          }
78      }
79      
80      @Override
81      public String toString() {
82          return e;
83      }
84  }