1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 package org.archive.crawler.settings;
25
26 import java.lang.reflect.InvocationTargetException;
27 import java.text.ParseException;
28 import java.util.HashMap;
29 import java.util.Map;
30 import java.util.Stack;
31 import java.util.logging.Level;
32 import java.util.logging.Logger;
33
34 import javax.management.Attribute;
35 import javax.management.AttributeNotFoundException;
36 import javax.management.InvalidAttributeValueException;
37
38 import org.archive.crawler.settings.Constraint.FailedCheck;
39 import org.archive.crawler.settings.refinements.PortnumberCriteria;
40 import org.archive.crawler.settings.refinements.Refinement;
41 import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
42 import org.archive.crawler.settings.refinements.TimespanCriteria;
43 import org.archive.util.ArchiveUtils;
44 import org.xml.sax.Attributes;
45 import org.xml.sax.Locator;
46 import org.xml.sax.SAXException;
47 import org.xml.sax.SAXParseException;
48 import org.xml.sax.helpers.DefaultHandler;
49
50 /***
51 * An SAX element handler that updates a CrawlerSettings object.
52 *
53 * This is a helper class for the XMLSettingsHandler.
54 *
55 * @author John Erik Halse
56 */
57 public class CrawlSettingsSAXHandler extends DefaultHandler implements
58 ValueErrorHandler {
59
60 private static Logger logger = Logger
61 .getLogger("org.archive.crawler.settings.XMLSettingsHandler");
62
63 private Locator locator;
64
65 private CrawlerSettings settings;
66
67 private SettingsHandler settingsHandler;
68
69 private Map<String,ElementHandler> handlers
70 = new HashMap<String,ElementHandler>();
71
72 private Stack<ElementHandler> handlerStack = new Stack<ElementHandler>();
73
74 private Stack<Object> stack = new Stack<Object>();
75
76 /*** Keeps track of elements which subelements should be skipped. */
77 private Stack<Boolean> skip = new Stack<Boolean>();
78
79 private StringBuffer buffer = new StringBuffer();
80
81 private String value;
82
83 /***
84 * Creates a new CrawlSettingsSAXHandler.
85 *
86 * @param settings the settings object that should be updated from this
87 * handler.
88 */
89 public CrawlSettingsSAXHandler(CrawlerSettings settings) {
90 super();
91 this.settings = settings;
92 this.settingsHandler = settings.getSettingsHandler();
93 handlers.put(XMLSettingsHandler.XML_ROOT_ORDER, new RootHandler());
94 handlers.put(XMLSettingsHandler.XML_ROOT_HOST_SETTINGS,
95 new RootHandler());
96 handlers.put(XMLSettingsHandler.XML_ROOT_REFINEMENT, new RootHandler());
97 handlers.put(XMLSettingsHandler.XML_ELEMENT_CONTROLLER,
98 new ModuleHandler());
99 handlers
100 .put(XMLSettingsHandler.XML_ELEMENT_OBJECT, new ModuleHandler());
101 handlers.put(XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT,
102 new NewModuleHandler());
103 handlers.put(XMLSettingsHandler.XML_ELEMENT_META, new MetaHandler());
104 handlers.put(XMLSettingsHandler.XML_ELEMENT_NAME, new NameHandler());
105 handlers.put(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION,
106 new DescriptionHandler());
107 handlers.put(XMLSettingsHandler.XML_ELEMENT_OPERATOR,
108 new OperatorHandler());
109 handlers.put(XMLSettingsHandler.XML_ELEMENT_ORGANIZATION,
110 new OrganizationHandler());
111 handlers.put(XMLSettingsHandler.XML_ELEMENT_AUDIENCE,
112 new AudienceHandler());
113 handlers.put(XMLSettingsHandler.XML_ELEMENT_DATE, new DateHandler());
114 handlers.put(SettingsHandler.MAP, new MapHandler());
115 handlers.put(SettingsHandler.INTEGER_LIST, new ListHandler());
116 handlers.put(SettingsHandler.STRING_LIST, new ListHandler());
117 handlers.put(SettingsHandler.DOUBLE_LIST, new ListHandler());
118 handlers.put(SettingsHandler.FLOAT_LIST, new ListHandler());
119 handlers.put(SettingsHandler.LONG_LIST, new ListHandler());
120 handlers.put(SettingsHandler.STRING, new SimpleElementHandler());
121 handlers.put(SettingsHandler.TEXT, new SimpleElementHandler());
122 handlers.put(SettingsHandler.INTEGER, new SimpleElementHandler());
123 handlers.put(SettingsHandler.FLOAT, new SimpleElementHandler());
124 handlers.put(SettingsHandler.LONG, new SimpleElementHandler());
125 handlers.put(SettingsHandler.BOOLEAN, new SimpleElementHandler());
126 handlers.put(SettingsHandler.DOUBLE, new SimpleElementHandler());
127
128 handlers.put(XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
129 new RefinementListHandler());
130 handlers.put(XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
131 new RefinementHandler());
132 handlers.put(XMLSettingsHandler.XML_ELEMENT_REFERENCE,
133 new ReferenceHandler());
134 handlers
135 .put(XMLSettingsHandler.XML_ELEMENT_LIMITS, new LimitsHandler());
136 handlers.put(XMLSettingsHandler.XML_ELEMENT_TIMESPAN,
137 new TimespanHandler());
138 handlers.put(XMLSettingsHandler.XML_ELEMENT_PORTNUMBER,
139 new PortnumberHandler());
140 handlers.put(XMLSettingsHandler.XML_ELEMENT_URIMATCHES,
141 new URIMatcherHandler());
142 }
143
144
145
146
147
148
149 public void setDocumentLocator(Locator locator) {
150 super.setDocumentLocator(locator);
151 this.locator = locator;
152 }
153
154
155
156
157
158
159 public void startDocument() throws SAXException {
160 settingsHandler.registerValueErrorHandler(this);
161 skip.push(new Boolean(false));
162 super.startDocument();
163 }
164
165
166
167
168
169
170 public void endDocument() throws SAXException {
171 settingsHandler.unregisterValueErrorHandler(this);
172 super.endDocument();
173 }
174
175
176
177
178
179
180 public void characters(char[] ch, int start, int length)
181 throws SAXException {
182 super.characters(ch, start, length);
183 buffer.append(ch, start, length);
184 }
185
186 /***
187 * Start of an element. Decide what handler to use, and call it.
188 *
189 * @param uri
190 * @param localName
191 * @param qName
192 * @param attributes
193 * @throws SAXException
194 */
195 public void startElement(String uri, String localName, String qName,
196 Attributes attributes) throws SAXException {
197
198 ElementHandler handler = ((ElementHandler) handlers.get(qName));
199 if (handler != null) {
200 handlerStack.push(handler);
201
202 if (((Boolean) skip.peek()).booleanValue()) {
203 skip.push(new Boolean(true));
204 String moduleName = attributes
205 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME);
206 logger.fine("Skipping: " + qName + " " + moduleName);
207 } else {
208 try {
209 handler.startElement(qName, attributes);
210 skip.push(new Boolean(false));
211 } catch (SAXException e) {
212 if (e.getException() instanceof InvocationTargetException
213 || e.getException() instanceof AttributeNotFoundException) {
214 skip.push(new Boolean(true));
215 } else {
216 skip.push(new Boolean(false));
217 throw e;
218 }
219 }
220 }
221 } else {
222 String tmp = "Unknown element '" + qName + "' in '" +
223 locator.getSystemId() + "', line: " + locator.getLineNumber() +
224 ", column: " + locator.getColumnNumber();
225 if (this.settingsHandler.getOrder() != null &&
226 this.settingsHandler.getOrder().getController() != null) {
227 logger.log(Level.WARNING, tmp);
228 }
229 logger.warning(tmp);
230 }
231 }
232
233 /***
234 * End of an element.
235 *
236 * @param uri
237 * @param localName
238 * @param qName
239 * @throws SAXException
240 */
241 public void endElement(String uri, String localName, String qName)
242 throws SAXException {
243 value = buffer.toString().trim();
244 buffer.setLength(0);
245 ElementHandler handler = (ElementHandler) handlerStack.pop();
246 if (!((Boolean) skip.pop()).booleanValue()) {
247 if (handler != null) {
248 handler.endElement(qName);
249 }
250 }
251 }
252
253 public void illegalElementError(String name) throws SAXParseException {
254 throw new SAXParseException("Element '" + name + "' not allowed here",
255 locator);
256 }
257
258 /***
259 * Superclass of all the elementhandlers.
260 *
261 * This class should be subclassed for the different XML-elements.
262 *
263 * @author John Erik Halse
264 */
265 private class ElementHandler {
266
267 /***
268 * Start of an element
269 *
270 * @param name
271 * @param atts
272 * @throws SAXException
273 */
274 public void startElement(String name, Attributes atts)
275 throws SAXException {
276 }
277
278 /***
279 * End of an element
280 *
281 * @param name
282 * @throws SAXException
283 */
284 public void endElement(String name) throws SAXException {
285 }
286 }
287
288 /***
289 * Handle the root element.
290 *
291 * This class checks that the root element is of the right type.
292 *
293 * @author John Erik Halse
294 */
295 private class RootHandler extends ElementHandler {
296
297 public void startElement(String name, Attributes atts)
298 throws SAXException {
299
300 if ((name.equals(XMLSettingsHandler.XML_ROOT_ORDER) && settings
301 .getScope() != null)
302 || (name.equals(XMLSettingsHandler.XML_ROOT_HOST_SETTINGS) && settings
303 .getScope() == null)
304 || (name.equals(XMLSettingsHandler.XML_ROOT_REFINEMENT) && !settings
305 .isRefinement())) {
306 throw new SAXParseException("Wrong document type '" + name
307 + "'", locator);
308 }
309 }
310 }
311
312
313 private class MetaHandler extends ElementHandler {
314 }
315
316 private class NameHandler extends ElementHandler {
317
318 public void endElement(String name) throws SAXException {
319 if (handlerStack.peek() instanceof MetaHandler) {
320 settings.setName(value);
321 } else {
322 illegalElementError(name);
323 }
324 }
325 }
326
327 private class DescriptionHandler extends ElementHandler {
328
329 public void endElement(String name) throws SAXException {
330 if (handlerStack.peek() instanceof MetaHandler) {
331 settings.setDescription(value);
332 } else if (handlerStack.peek() instanceof RefinementHandler) {
333 ((Refinement) stack.peek()).setDescription(value);
334 } else {
335 illegalElementError(name);
336 }
337 }
338 }
339
340 private class OrganizationHandler extends ElementHandler {
341
342 public void endElement(String name) throws SAXException {
343 if (handlerStack.peek() instanceof MetaHandler) {
344 settings.setOrganization(value);
345 } else if (handlerStack.peek() instanceof RefinementHandler) {
346 ((Refinement) stack.peek()).setOrganization(value);
347 } else {
348 illegalElementError(name);
349 }
350 }
351 }
352
353 private class OperatorHandler extends ElementHandler {
354
355 public void endElement(String name) throws SAXException {
356 if (handlerStack.peek() instanceof MetaHandler) {
357 settings.setOperator(value);
358 } else if (handlerStack.peek() instanceof RefinementHandler) {
359 ((Refinement) stack.peek()).setOperator(value);
360 } else {
361 illegalElementError(name);
362 }
363 }
364 }
365
366 private class AudienceHandler extends ElementHandler {
367
368 public void endElement(String name) throws SAXException {
369 if (handlerStack.peek() instanceof MetaHandler) {
370 settings.setAudience(value);
371 } else if (handlerStack.peek() instanceof RefinementHandler) {
372 ((Refinement) stack.peek()).setAudience(value);
373 } else {
374 illegalElementError(name);
375 }
376 }
377 }
378
379 private class DateHandler extends ElementHandler {
380
381 public void endElement(String name) throws SAXException {
382 if (handlerStack.peek() instanceof MetaHandler) {
383 try {
384 settings.setLastSavedTime(ArchiveUtils
385 .parse14DigitDate(value));
386 } catch (ParseException e) {
387 throw new SAXException(e);
388 }
389 } else {
390 illegalElementError(name);
391 }
392 }
393 }
394
395
396 private class RefinementListHandler extends ElementHandler {
397
398 public void startElement(String name) throws SAXException {
399 if (!(handlerStack.peek() instanceof RootHandler)) {
400 illegalElementError(name);
401 }
402 }
403 }
404
405 private class RefinementHandler extends ElementHandler {
406 public void startElement(String name, Attributes atts)
407 throws SAXException {
408 stack.push(new Refinement(settings, atts
409 .getValue(XMLSettingsHandler.XML_ELEMENT_REFERENCE)));
410 }
411 }
412
413 private class ReferenceHandler extends ElementHandler {
414
415 public void endElement(String name) throws SAXException {
416 if (handlerStack.peek() instanceof RefinementHandler) {
417 ((Refinement) stack.peek()).setReference(value);
418 } else {
419 illegalElementError(name);
420 }
421 }
422 }
423
424 private class LimitsHandler extends ElementHandler {
425 }
426
427 private class TimespanHandler extends ElementHandler {
428
429 public void startElement(String name, Attributes atts)
430 throws SAXException {
431 if (stack.peek() instanceof Refinement) {
432 String from = atts
433 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_FROM);
434 String to = atts.getValue(XMLSettingsHandler.XML_ATTRIBUTE_TO);
435 try {
436 TimespanCriteria timespan = new TimespanCriteria(from, to);
437 ((Refinement) stack.peek()).addCriteria(timespan);
438 } catch (ParseException e) {
439 throw new SAXException(e);
440 }
441 } else {
442 illegalElementError(name);
443 }
444 }
445 }
446
447 private class PortnumberHandler extends ElementHandler {
448
449 public void endElement(String name) throws SAXException {
450 if (handlerStack.peek() instanceof LimitsHandler) {
451 ((Refinement) stack.peek()).addCriteria(new PortnumberCriteria(value));
452 } else {
453 illegalElementError(name);
454 }
455 }
456 }
457
458 private class URIMatcherHandler extends ElementHandler {
459
460 public void endElement(String name) throws SAXException {
461 if (handlerStack.peek() instanceof LimitsHandler) {
462 ((Refinement) stack.peek()).addCriteria(new RegularExpressionCriteria(value));
463 } else {
464 illegalElementError(name);
465 }
466 }
467 }
468
469
470
471 private class ModuleHandler extends ElementHandler {
472
473 public void startElement(String name, Attributes atts)
474 throws SAXException {
475 ModuleType module;
476 if (name.equals(XMLSettingsHandler.XML_ELEMENT_CONTROLLER)) {
477 module = settingsHandler.getOrder();
478 } else {
479 module = settingsHandler.getSettingsObject(null).getModule(
480 atts.getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME));
481 }
482 stack.push(module);
483 }
484
485 public void endElement(String name) throws SAXException {
486 stack.pop();
487 }
488 }
489
490 private class NewModuleHandler extends ElementHandler {
491
492 public void startElement(String name, Attributes atts)
493 throws SAXException {
494 ComplexType parentModule = (ComplexType) stack.peek();
495 String moduleName = atts
496 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME);
497 String moduleClass = atts
498 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_CLASS);
499 try {
500 ModuleType module = SettingsHandler
501 .instantiateModuleTypeFromClassName(moduleName,
502 moduleClass);
503 try {
504 parentModule.setAttribute(settings, module);
505 } catch (AttributeNotFoundException e) {
506
507
508
509 try {
510 parentModule.addElement(settings, module);
511 } catch (IllegalStateException ise) {
512
513
514 logger.log(Level.WARNING,"Module '" + moduleName + "' in '"
515 + locator.getSystemId() + "', line: "
516 + locator.getLineNumber() + ", column: "
517 + locator.getColumnNumber()
518 + " is not defined in '"
519 + parentModule.getName() + "'.");
520 throw new SAXException(new AttributeNotFoundException(
521 ise.getMessage()));
522 }
523 }
524 stack.push(module);
525 } catch (InvocationTargetException e) {
526 logger.log(Level.WARNING,"Couldn't instantiate " + moduleName
527 + ", from class: " + moduleClass + "' in '"
528 + locator.getSystemId() + "', line: "
529 + locator.getLineNumber() + ", column: "
530 + locator.getColumnNumber(), e);
531 throw new SAXException(e);
532 } catch (InvalidAttributeValueException e) {
533 throw new SAXException(e);
534 }
535 }
536
537 public void endElement(String name) throws SAXException {
538 stack.pop();
539 }
540 }
541
542 private class MapHandler extends ElementHandler {
543
544 public void startElement(String name, Attributes atts)
545 throws SAXException {
546 String mapName = atts
547 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME);
548 ComplexType parentModule = (ComplexType) stack.peek();
549 try {
550 stack.push(parentModule.getAttribute(settings, mapName));
551 } catch (AttributeNotFoundException e) {
552 throw new SAXException(e);
553 }
554 }
555
556 public void endElement(String name) throws SAXException {
557 stack.pop();
558 }
559 }
560
561 private class SimpleElementHandler extends ElementHandler {
562
563 public void startElement(String name, Attributes atts)
564 throws SAXException {
565 stack.push(atts.getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME));
566 }
567
568 public void endElement(String name) throws SAXException {
569 String elementName = (String) stack.pop();
570 Object container = stack.peek();
571 if (container instanceof ComplexType) {
572 try {
573 try {
574 ((ComplexType) container).setAttribute(settings,
575 new Attribute(elementName, value));
576 } catch (AttributeNotFoundException e) {
577
578
579
580 try {
581 ((ComplexType) container).addElement(settings,
582 new SimpleType(elementName, "", value));
583 } catch (IllegalStateException ise) {
584 logger.warning("Unknown attribute '" + elementName
585 + "' in '" + locator.getSystemId()
586 + "', line: " + locator.getLineNumber()
587 + ", column: " + locator.getColumnNumber());
588 }
589 }
590 } catch (InvalidAttributeValueException e) {
591 try {
592 logger.warning("Illegal value '"
593 + value
594 + "' for attribute '"
595 + elementName
596 + "' in '"
597 + locator.getSystemId()
598 + "', line: "
599 + locator.getLineNumber()
600 + ", column: "
601 + locator.getColumnNumber()
602 + ", Value reset to default value: "
603 + ((ComplexType) container).getAttribute(
604 settings, elementName));
605 } catch (AttributeNotFoundException e1) {
606 throw new SAXException(e1);
607 }
608 }
609 } else {
610 if (container == null) {
611
612
613
614
615 logger.severe("Empty container (Was a referenced parent" +
616 " filter removed?). Element details: elementName " +
617 elementName + ", name " + name);
618 } else {
619 ((ListType) container).add(value);
620 }
621 }
622 }
623 }
624
625 private class ListHandler extends ElementHandler {
626
627 public void startElement(String name, Attributes atts)
628 throws SAXException {
629 String listName = atts
630 .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME);
631 ComplexType parentModule = (ComplexType) stack.peek();
632 ListType list;
633 try {
634 list = (ListType) parentModule.getAttribute(settings, listName);
635 } catch (AttributeNotFoundException e) {
636 throw new SAXException(e);
637 }
638 list.clear();
639 stack.push(list);
640 }
641
642 public void endElement(String name) throws SAXException {
643 stack.pop();
644 }
645 }
646
647
648
649
650
651
652 public void handleValueError(FailedCheck error) {
653 logger.warning(error.getMessage() + "\n Attribute: '"
654 + error.getOwner().getName() + ":"
655 + error.getDefinition().getName() + "'\n Value: '" + value
656 + "'\n File: '" + locator.getSystemId() + "', line: "
657 + locator.getLineNumber() + ", column: "
658 + locator.getColumnNumber());
659 }
660 }