View Javadoc

1   /* Value
2   *
3   * $Id: Value.java 4644 2006-09-20 22:40:21Z paul_jack $
4   *
5   * Created on July 26, 2006.
6   *
7   * Copyright (C) 2006 Internet Archive.
8   *
9   * This file is part of the Heritrix web crawler (crawler.archive.org).
10  *
11  * Heritrix is free software; you can redistribute it and/or modify
12  * it under the terms of the GNU Lesser Public License as published by
13  * the Free Software Foundation; either version 2.1 of the License, or
14  * any later version.
15  *
16  * Heritrix is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19  * GNU Lesser Public License for more details.
20  *
21  * You should have received a copy of the GNU Lesser Public License
22  * along with Heritrix; if not, write to the Free Software
23  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24  */
25  package org.archive.util.anvl;
26  
27  /***
28   * TODO: Now values 'fold' but should but perhaps they shouldn't be stored
29   * folded.  Only when we serialize should we fold (But how to know where
30   * to fold?).
31   * @author stack
32   * @version $Date: 2006-09-20 22:40:21 +0000 (Wed, 20 Sep 2006) $ $Version$
33   */
34  class Value extends SubElement {
35  
36      private StringBuilder sb;
37      private boolean folding = false;
38  	
39      private Value() {
40          this(null);
41      }
42      
43      public Value(final String s) {
44          super(s);
45      }
46      
47      protected String baseCheck(String s) {
48          this.sb = new StringBuilder(s.length() * 2);
49          super.baseCheck(s);
50          return sb.toString();
51      }
52      
53      @Override
54      protected void checkCharacter(char c, String srcStr, int index) {
55          checkControlCharacter(c, srcStr, index);
56          // Now, rewrite the value String with folding (If CR or LF or CRLF
57          // present.
58          if (ANVLRecord.isCR(c)) {
59              this.folding = true;
60              this.sb.append(ANVLRecord.FOLD_PREFIX);
61          } else if (ANVLRecord.isLF(c)) {
62              if (!this.folding) {
63                  this.folding = true;
64                  this.sb.append(ANVLRecord.FOLD_PREFIX);
65              } else {
66                  // Previous character was a CR. Fold prefix has been added.
67              }
68          } else if (this.folding && Character.isWhitespace(c)) {
69              // Only write out one whitespace character. Skip.
70          } else {
71              this.folding = false;
72              this.sb.append(c);
73          }
74      }
75  }