1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.util.iterator;
26
27 import java.util.Iterator;
28 import java.util.logging.Logger;
29 import java.util.regex.Matcher;
30 import java.util.regex.Pattern;
31
32 /***
33 * Utility class providing an Iterator interface over line-oriented
34 * text input. By providing regexps indicating lines to ignore
35 * (such as pure whitespace or comments), lines to consider input, and
36 * what to return from the input lines (such as a whitespace-trimmed
37 * non-whitespace token with optional trailing comment), this can
38 * be configured to handle a number of formats.
39 *
40 * The public static members provide pattern configurations that will
41 * be helpful in a wide variety of contexts.
42 *
43 * @author gojomo
44 */
45 public class RegexpLineIterator
46 extends TransformingIteratorWrapper<String,String> {
47 private static final Logger logger =
48 Logger.getLogger(RegexpLineIterator.class.getName());
49
50 public static final String COMMENT_LINE = "//s*(#.*)?";
51 public static final String NONWHITESPACE_ENTRY_TRAILING_COMMENT =
52 "^[//s\ufeff]*(//S+)//s*(#.*)?$";
53 public static final String TRIMMED_ENTRY_TRAILING_COMMENT =
54 "^//s*([^#]+?)//s*(#.*)?$";
55
56 public static final String ENTRY = "$1";
57
58 protected Matcher ignoreLine = null;
59 protected Matcher extractLine = null;
60 protected String outputTemplate = null;
61
62
63 public RegexpLineIterator(Iterator<String> inner, String ignore,
64 String extract, String replace) {
65 this.inner = inner;
66 ignoreLine = Pattern.compile(ignore).matcher("");
67 extractLine = Pattern.compile(extract).matcher("");
68 outputTemplate = replace;
69 }
70
71 /***
72 * Loads next item into lookahead spot, if available. Skips
73 * lines matching ignoreLine; extracts desired portion of
74 * lines matching extractLine; informationally reports any
75 * lines matching neither.
76 *
77 * @return whether any item was loaded into next field
78 */
79 protected String transform(String line) {
80 ignoreLine.reset(line);
81 if(ignoreLine.matches()) {
82 return null;
83 }
84 extractLine.reset(line);
85 if(extractLine.matches()) {
86 StringBuffer output = new StringBuffer();
87
88
89 extractLine.appendReplacement(output,outputTemplate);
90 return output.toString();
91 }
92
93 logger.warning("line not extracted nor no-op: "+line);
94 return null;
95 }
96 }