1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.settings;
26
27 import java.io.IOException;
28 import java.text.ParseException;
29 import java.util.Iterator;
30
31 import javax.management.AttributeNotFoundException;
32 import javax.management.MBeanInfo;
33 import javax.xml.transform.sax.SAXSource;
34
35 import org.archive.crawler.settings.refinements.PortnumberCriteria;
36 import org.archive.crawler.settings.refinements.Refinement;
37 import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
38 import org.archive.crawler.settings.refinements.TimespanCriteria;
39 import org.archive.util.ArchiveUtils;
40 import org.xml.sax.Attributes;
41 import org.xml.sax.ContentHandler;
42 import org.xml.sax.DTDHandler;
43 import org.xml.sax.EntityResolver;
44 import org.xml.sax.ErrorHandler;
45 import org.xml.sax.InputSource;
46 import org.xml.sax.SAXException;
47 import org.xml.sax.SAXNotRecognizedException;
48 import org.xml.sax.SAXNotSupportedException;
49 import org.xml.sax.XMLReader;
50 import org.xml.sax.helpers.AttributesImpl;
51
52 /*** Class that takes a CrawlerSettings object and create SAXEvents from it.
53 *
54 * This is a helper class for XMLSettingsHandler.
55 *
56 * @author John Erik Halse
57 */
58 public class CrawlSettingsSAXSource extends SAXSource implements XMLReader {
59
60 private static final int indentAmount = 2;
61
62 private CrawlerSettings settings;
63 private ContentHandler handler;
64 private boolean orderFile = false;
65
66 /*** Constructs a new CrawlSettingsSAXSource.
67 *
68 * @param settings the settings object to create SAX events from.
69 */
70 public CrawlSettingsSAXSource(CrawlerSettings settings) {
71 super();
72 this.settings = settings;
73 if (settings.getParent() == null) {
74 orderFile = true;
75 }
76 }
77
78
79
80
81 public boolean getFeature(String name)
82 throws SAXNotRecognizedException, SAXNotSupportedException {
83 return false;
84 }
85
86
87
88
89 public void setFeature(String name, boolean value)
90 throws SAXNotRecognizedException, SAXNotSupportedException {
91
92 }
93
94
95
96
97 public Object getProperty(String name)
98 throws SAXNotRecognizedException, SAXNotSupportedException {
99 return null;
100 }
101
102
103
104
105 public void setProperty(String name, Object value)
106 throws SAXNotRecognizedException, SAXNotSupportedException {
107
108 }
109
110
111
112
113 public void setEntityResolver(EntityResolver resolver) {
114
115 }
116
117
118
119
120 public EntityResolver getEntityResolver() {
121 return null;
122 }
123
124
125
126
127 public void setDTDHandler(DTDHandler handler) {
128 }
129
130
131
132
133 public DTDHandler getDTDHandler() {
134 return null;
135 }
136
137
138
139
140 public void setContentHandler(ContentHandler handler) {
141 this.handler = handler;
142 }
143
144
145
146
147 public ContentHandler getContentHandler() {
148 return handler;
149 }
150
151
152
153
154 public void setErrorHandler(ErrorHandler handler) {
155 }
156
157
158
159
160 public ErrorHandler getErrorHandler() {
161 return null;
162 }
163
164
165 private static final String nsu = "";
166 private static final char[] indentArray =
167 "\n ".toCharArray();
168
169
170
171
172 public void parse(InputSource input) throws IOException, SAXException {
173 if (handler == null) {
174 throw new SAXException("No content handler");
175 }
176 handler.startDocument();
177 AttributesImpl atts = new AttributesImpl();
178 atts.addAttribute(
179 "http://www.w3.org/2001/XMLSchema-instance",
180 "xsi",
181 "xmlns:xsi",
182 nsu,
183 "http://www.w3.org/2001/XMLSchema-instance");
184 atts.addAttribute(
185 "http://www.w3.org/2001/XMLSchema-instance",
186 "noNamespaceSchemaLocation",
187 "xsi:noNamespaceSchemaLocation",
188 nsu,
189 XMLSettingsHandler.XML_SCHEMA);
190 String rootElement;
191 if (settings.isRefinement()) {
192 rootElement = XMLSettingsHandler.XML_ROOT_REFINEMENT;
193 } else if (orderFile) {
194 rootElement = XMLSettingsHandler.XML_ROOT_ORDER;
195 } else {
196 rootElement = XMLSettingsHandler.XML_ROOT_HOST_SETTINGS;
197 }
198 handler.startElement(nsu, rootElement, rootElement, atts);
199
200 parseMetaData(1 + indentAmount);
201
202 if (settings.hasRefinements()) {
203 parseRefinements(1 + indentAmount);
204 }
205
206
207 Iterator modules = settings.topLevelModules();
208 while (modules.hasNext()) {
209 ComplexType complexType = (ComplexType) modules.next();
210 parseComplexType(complexType, 1 + indentAmount);
211 }
212
213 handler.ignorableWhitespace(indentArray, 0, 1);
214 handler.endElement(nsu, rootElement, rootElement);
215 handler.ignorableWhitespace(indentArray, 0, 1);
216 handler.endDocument();
217 }
218
219 private void parseRefinements(int indent) throws SAXException {
220 Attributes nullAtts = new AttributesImpl();
221 handler.ignorableWhitespace(indentArray, 0, indent);
222 handler.startElement(nsu,
223 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
224 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, nullAtts);
225
226 Iterator it = settings.refinementsIterator();
227 while (it.hasNext()) {
228 Refinement refinement = (Refinement) it.next();
229 handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);
230 AttributesImpl reference = new AttributesImpl();
231 reference.addAttribute(nsu,
232 XMLSettingsHandler.XML_ELEMENT_REFERENCE,
233 XMLSettingsHandler.XML_ELEMENT_REFERENCE, nsu, refinement
234 .getReference());
235 handler.startElement(nsu,
236 XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
237 XMLSettingsHandler.XML_ELEMENT_REFINEMENT, reference);
238
239 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION,
240 refinement.getDescription(), nullAtts, indent + 2
241 * indentAmount);
242
243 parseRefinementLimits(refinement, indent + 2 * indentAmount);
244
245 handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);
246 handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
247 XMLSettingsHandler.XML_ELEMENT_REFINEMENT);
248 }
249
250 handler.ignorableWhitespace(indentArray, 0, indent);
251 handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
252 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST);
253 }
254
255 private void parseRefinementLimits(Refinement refinement, int indent)
256 throws SAXException {
257 Attributes nullAtts = new AttributesImpl();
258
259 handler.ignorableWhitespace(indentArray, 0, indent);
260 handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,
261 XMLSettingsHandler.XML_ELEMENT_LIMITS, nullAtts);
262
263 Iterator it = refinement.criteriaIterator();
264 while (it.hasNext()) {
265 Object limit = it.next();
266 if (limit instanceof TimespanCriteria) {
267 AttributesImpl timeSpan = new AttributesImpl();
268 timeSpan.addAttribute(nsu,
269 XMLSettingsHandler.XML_ATTRIBUTE_FROM,
270 XMLSettingsHandler.XML_ATTRIBUTE_FROM, nsu,
271 ((TimespanCriteria) limit).getFrom());
272 timeSpan.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_TO,
273 XMLSettingsHandler.XML_ATTRIBUTE_TO, nsu,
274 ((TimespanCriteria) limit).getTo());
275 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_TIMESPAN, "",
276 timeSpan, indent + 2 * indentAmount);
277 } else if (limit instanceof PortnumberCriteria) {
278 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_PORTNUMBER,
279 ((PortnumberCriteria) limit).getPortNumber(), nullAtts,
280 indent + 2 * indentAmount);
281 } else if (limit instanceof RegularExpressionCriteria) {
282 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_URIMATCHES,
283 ((RegularExpressionCriteria) limit).getRegexp(), nullAtts,
284 indent + 2 * indentAmount);
285 }
286 }
287
288 handler.ignorableWhitespace(indentArray, 0, indent);
289 handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,
290 XMLSettingsHandler.XML_ELEMENT_LIMITS);
291
292 }
293
294 private void parseMetaData(int indent) throws SAXException {
295
296 Attributes nullAtts = new AttributesImpl();
297 handler.ignorableWhitespace(indentArray, 0, indent);
298 handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
299 XMLSettingsHandler.XML_ELEMENT_META, nullAtts);
300
301
302 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_NAME, settings
303 .getName(), null, indent + indentAmount);
304
305
306 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION, settings
307 .getDescription(), null, indent + indentAmount);
308
309
310 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_OPERATOR, settings
311 .getOperator(), null, indent + indentAmount);
312
313
314 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_ORGANIZATION, settings
315 .getOrganization(), null, indent + indentAmount);
316
317
318 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_AUDIENCE, settings
319 .getAudience(), null, indent + indentAmount);
320
321
322 String dateStamp = ArchiveUtils.get14DigitDate();
323 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DATE, dateStamp,
324 null, indent + indentAmount);
325 try {
326 settings.setLastSavedTime(ArchiveUtils.parse14DigitDate(dateStamp));
327 } catch (ParseException e) {
328
329
330 e.printStackTrace();
331 }
332
333 handler.ignorableWhitespace(indentArray, 0, indent);
334 handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
335 XMLSettingsHandler.XML_ELEMENT_META);
336 }
337
338 /***
339 * Create SAX events from a {@link ComplexType}.
340 *
341 * @param complexType the object to creat SAX events from.
342 * @param indent the indentation amount for prettyprinting XML.
343 * @throws SAXException is thrown if an error occurs.
344 */
345 private void parseComplexType(ComplexType complexType, int indent)
346 throws SAXException {
347 if (complexType.isTransient()) {
348 return;
349 }
350 MBeanInfo mbeanInfo = complexType.getMBeanInfo(settings);
351 String objectElement = resolveElementName(complexType);
352 AttributesImpl atts = new AttributesImpl();
353 atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_NAME,
354 XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu, complexType
355 .getName());
356 if (objectElement == XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT) {
357
358 atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_CLASS,
359 XMLSettingsHandler.XML_ATTRIBUTE_CLASS, nsu, mbeanInfo
360 .getClassName());
361 }
362 if (complexType.getParent() == null) {
363 atts = new AttributesImpl();
364 }
365 handler.ignorableWhitespace(indentArray, 0, indent);
366 handler.startElement(nsu, objectElement, objectElement, atts);
367 for (Iterator it = complexType.getAttributeInfoIterator(settings); it
368 .hasNext();) {
369 ModuleAttributeInfo attribute = (ModuleAttributeInfo) it.next();
370 if (!attribute.isTransient()) {
371 parseAttribute(complexType, attribute, indent);
372 }
373 }
374 handler.ignorableWhitespace(indentArray, 0, indent);
375 handler.endElement(nsu, objectElement, objectElement);
376 }
377
378 private void parseAttribute(ComplexType complexType,
379 ModuleAttributeInfo attribute, int indent) throws SAXException {
380 Object value;
381 try {
382 value = complexType
383 .getLocalAttribute(settings, attribute.getName());
384 } catch (AttributeNotFoundException e) {
385 throw new SAXException(e);
386 }
387 if (orderFile || value != null) {
388
389 if (attribute.isComplexType()) {
390
391 parseComplexType((ComplexType) value, indent + indentAmount);
392 } else {
393
394 String elementName = SettingsHandler.getTypeName(attribute
395 .getType());
396 AttributesImpl atts = new AttributesImpl();
397 atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_NAME,
398 XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu, attribute
399 .getName());
400 if (value == null) {
401 try {
402 value = complexType.getAttribute(attribute.getName());
403 } catch (Exception e) {
404 throw new SAXException(
405 "Internal error in settings subsystem", e);
406 }
407 }
408 if (value != null) {
409 handler.ignorableWhitespace(indentArray, 0, indent
410 + indentAmount);
411 handler.startElement(nsu, elementName, elementName, atts);
412 if (value instanceof ListType) {
413 parseListData(value, indent + indentAmount);
414 handler.ignorableWhitespace(indentArray, 0, indent
415 + indentAmount);
416 } else {
417 char valueArray[] = value.toString().toCharArray();
418 handler.characters(valueArray, 0, valueArray.length);
419 }
420 handler.endElement(nsu, elementName, elementName);
421 }
422 }
423 }
424 }
425
426 /*** Create SAX events for the content of a {@link ListType}.
427 *
428 * @param value the ListType whose content we create SAX events for.
429 * @param indent the indentation amount for prettyprinting XML.
430 * @throws SAXException is thrown if an error occurs.
431 */
432 private void parseListData(Object value, int indent) throws SAXException {
433 ListType list = (ListType) value;
434 Iterator it = list.iterator();
435 while (it.hasNext()) {
436 Object element = it.next();
437 String elementName =
438 SettingsHandler.getTypeName(element.getClass().getName());
439 writeSimpleElement(
440 elementName,
441 element.toString(),
442 null,
443 indent + indentAmount);
444 }
445 }
446
447 /*** Resolve the XML element name of a {@link ComplexType}.
448 *
449 * @param complexType the object to investigate.
450 * @return the name of the XML element.
451 */
452 private String resolveElementName(ComplexType complexType) {
453 String elementName;
454 if (complexType instanceof ModuleType) {
455 if (complexType.getParent() == null) {
456
457 elementName = XMLSettingsHandler.XML_ELEMENT_CONTROLLER;
458 } else if (
459 !orderFile
460 && complexType.globalSettings().getModule(
461 complexType.getName())
462 != null) {
463
464 elementName = XMLSettingsHandler.XML_ELEMENT_OBJECT;
465 } else {
466
467 elementName = XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT;
468 }
469 } else {
470
471 elementName =
472 SettingsHandler.getTypeName(complexType.getClass().getName());
473 }
474 return elementName;
475 }
476
477 /*** Create SAX events for a simple element.
478 *
479 * Creates all the SAX events needed for prettyprinting an XML element
480 * with a simple value and possible attributes.
481 *
482 * @param elementName the name of the XML element.
483 * @param value the value to pu inside the XML element.
484 * @param atts the attributes for the XML element.
485 * @param indent the indentation amount for prettyprinting XML.
486 * @throws SAXException is thrown if an error occurs.
487 */
488 private void writeSimpleElement(
489 String elementName,
490 String value,
491 Attributes atts,
492 int indent)
493 throws SAXException {
494 if (atts == null) {
495 atts = new AttributesImpl();
496 }
497
498 value = value == null ? "" : value;
499 handler.ignorableWhitespace(indentArray, 0, indent);
500 handler.startElement(nsu, elementName, elementName, atts);
501 handler.characters(value.toCharArray(), 0, value.length());
502 handler.endElement(nsu, elementName, elementName);
503 }
504
505
506
507
508 public void parse(String systemId) throws IOException, SAXException {
509
510 }
511
512
513
514
515 public XMLReader getXMLReader() {
516 return this;
517 }
518
519
520
521
522 public InputSource getInputSource() {
523 return new InputSource();
524 }
525 }