View Javadoc

1   /* CrawlSettingsSAXSource
2    *
3    * $Id: CrawlSettingsSAXSource.java 3292 2005-03-31 23:49:52Z stack-sf $
4    *
5    * Created on Dec 5, 2003
6    *
7    * Copyright (C) 2004 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler.settings;
26  
27  import java.io.IOException;
28  import java.text.ParseException;
29  import java.util.Iterator;
30  
31  import javax.management.AttributeNotFoundException;
32  import javax.management.MBeanInfo;
33  import javax.xml.transform.sax.SAXSource;
34  
35  import org.archive.crawler.settings.refinements.PortnumberCriteria;
36  import org.archive.crawler.settings.refinements.Refinement;
37  import org.archive.crawler.settings.refinements.RegularExpressionCriteria;
38  import org.archive.crawler.settings.refinements.TimespanCriteria;
39  import org.archive.util.ArchiveUtils;
40  import org.xml.sax.Attributes;
41  import org.xml.sax.ContentHandler;
42  import org.xml.sax.DTDHandler;
43  import org.xml.sax.EntityResolver;
44  import org.xml.sax.ErrorHandler;
45  import org.xml.sax.InputSource;
46  import org.xml.sax.SAXException;
47  import org.xml.sax.SAXNotRecognizedException;
48  import org.xml.sax.SAXNotSupportedException;
49  import org.xml.sax.XMLReader;
50  import org.xml.sax.helpers.AttributesImpl;
51  
52  /*** Class that takes a CrawlerSettings object and create SAXEvents from it.
53   *
54   * This is a helper class for XMLSettingsHandler.
55   *
56   * @author John Erik Halse
57   */
58  public class CrawlSettingsSAXSource extends SAXSource implements XMLReader {
59      // for prettyprinting XML file
60      private static final int indentAmount = 2;
61  
62      private CrawlerSettings settings;
63      private ContentHandler handler;
64      private boolean orderFile = false;
65  
66      /*** Constructs a new CrawlSettingsSAXSource.
67       *
68       * @param settings the settings object to create SAX events from.
69       */
70      public CrawlSettingsSAXSource(CrawlerSettings settings) {
71          super();
72          this.settings = settings;
73          if (settings.getParent() == null) {
74              orderFile = true;
75          }
76      }
77  
78      /* (non-Javadoc)
79       * @see org.xml.sax.XMLReader#getFeature(java.lang.String)
80       */
81      public boolean getFeature(String name)
82          throws SAXNotRecognizedException, SAXNotSupportedException {
83          return false;
84      }
85  
86      /* (non-Javadoc)
87       * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)
88       */
89      public void setFeature(String name, boolean value)
90          throws SAXNotRecognizedException, SAXNotSupportedException {
91  
92      }
93  
94      /* (non-Javadoc)
95       * @see org.xml.sax.XMLReader#getProperty(java.lang.String)
96       */
97      public Object getProperty(String name)
98          throws SAXNotRecognizedException, SAXNotSupportedException {
99          return null;
100     }
101 
102     /* (non-Javadoc)
103      * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)
104      */
105     public void setProperty(String name, Object value)
106         throws SAXNotRecognizedException, SAXNotSupportedException {
107 
108     }
109 
110     /* (non-Javadoc)
111      * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)
112      */
113     public void setEntityResolver(EntityResolver resolver) {
114 
115     }
116 
117     /* (non-Javadoc)
118      * @see org.xml.sax.XMLReader#getEntityResolver()
119      */
120     public EntityResolver getEntityResolver() {
121         return null;
122     }
123 
124     /* (non-Javadoc)
125      * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)
126      */
127     public void setDTDHandler(DTDHandler handler) {
128     }
129 
130     /* (non-Javadoc)
131      * @see org.xml.sax.XMLReader#getDTDHandler()
132      */
133     public DTDHandler getDTDHandler() {
134         return null;
135     }
136 
137     /* (non-Javadoc)
138      * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)
139      */
140     public void setContentHandler(ContentHandler handler) {
141         this.handler = handler;
142     }
143 
144     /* (non-Javadoc)
145      * @see org.xml.sax.XMLReader#getContentHandler()
146      */
147     public ContentHandler getContentHandler() {
148         return handler;
149     }
150 
151     /* (non-Javadoc)
152      * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)
153      */
154     public void setErrorHandler(ErrorHandler handler) {
155     }
156 
157     /* (non-Javadoc)
158      * @see org.xml.sax.XMLReader#getErrorHandler()
159      */
160     public ErrorHandler getErrorHandler() {
161         return null;
162     }
163 
164     // We're not doing namespaces
165     private static final String nsu = ""; // NamespaceURI
166     private static final char[] indentArray =
167         "\n                                          ".toCharArray();
168 
169     /* (non-Javadoc)
170      * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)
171      */
172     public void parse(InputSource input) throws IOException, SAXException {
173         if (handler == null) {
174             throw new SAXException("No content handler");
175         }
176         handler.startDocument();
177         AttributesImpl atts = new AttributesImpl();
178         atts.addAttribute(
179             "http://www.w3.org/2001/XMLSchema-instance",
180             "xsi",
181             "xmlns:xsi",
182             nsu,
183             "http://www.w3.org/2001/XMLSchema-instance");
184         atts.addAttribute(
185             "http://www.w3.org/2001/XMLSchema-instance",
186             "noNamespaceSchemaLocation",
187             "xsi:noNamespaceSchemaLocation",
188             nsu,
189             XMLSettingsHandler.XML_SCHEMA);
190         String rootElement;
191         if (settings.isRefinement()) {
192             rootElement = XMLSettingsHandler.XML_ROOT_REFINEMENT;
193         } else if (orderFile) {
194             rootElement = XMLSettingsHandler.XML_ROOT_ORDER;
195         } else {
196             rootElement = XMLSettingsHandler.XML_ROOT_HOST_SETTINGS;
197         }
198         handler.startElement(nsu, rootElement, rootElement, atts);
199 
200         parseMetaData(1 + indentAmount);
201 
202         if (settings.hasRefinements()) {
203             parseRefinements(1 + indentAmount);
204         }
205 
206         // Write the modules
207         Iterator modules = settings.topLevelModules();
208         while (modules.hasNext()) {
209             ComplexType complexType = (ComplexType) modules.next();
210             parseComplexType(complexType, 1 + indentAmount);
211         }
212 
213         handler.ignorableWhitespace(indentArray, 0, 1);
214         handler.endElement(nsu, rootElement, rootElement);
215         handler.ignorableWhitespace(indentArray, 0, 1);
216         handler.endDocument();
217     }
218 
219     private void parseRefinements(int indent) throws SAXException {
220         Attributes nullAtts = new AttributesImpl();
221         handler.ignorableWhitespace(indentArray, 0, indent);
222         handler.startElement(nsu,
223                 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
224                 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, nullAtts);
225 
226         Iterator it = settings.refinementsIterator();
227         while (it.hasNext()) {
228             Refinement refinement = (Refinement) it.next();
229             handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);
230             AttributesImpl reference = new AttributesImpl();
231             reference.addAttribute(nsu,
232                     XMLSettingsHandler.XML_ELEMENT_REFERENCE,
233                     XMLSettingsHandler.XML_ELEMENT_REFERENCE, nsu, refinement
234                             .getReference());
235             handler.startElement(nsu,
236                     XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
237                     XMLSettingsHandler.XML_ELEMENT_REFINEMENT, reference);
238 
239             writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION,
240                     refinement.getDescription(), nullAtts, indent + 2
241                             * indentAmount);
242 
243             parseRefinementLimits(refinement, indent + 2 * indentAmount);
244 
245             handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);
246             handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENT,
247                     XMLSettingsHandler.XML_ELEMENT_REFINEMENT);
248         }
249 
250         handler.ignorableWhitespace(indentArray, 0, indent);
251         handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,
252                 XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST);
253     }
254 
255     private void parseRefinementLimits(Refinement refinement, int indent)
256             throws SAXException {
257         Attributes nullAtts = new AttributesImpl();
258 
259         handler.ignorableWhitespace(indentArray, 0, indent);
260         handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,
261                 XMLSettingsHandler.XML_ELEMENT_LIMITS, nullAtts);
262 
263         Iterator it = refinement.criteriaIterator();
264         while (it.hasNext()) {
265             Object limit = it.next();
266             if (limit instanceof TimespanCriteria) {
267                 AttributesImpl timeSpan = new AttributesImpl();
268                 timeSpan.addAttribute(nsu,
269                         XMLSettingsHandler.XML_ATTRIBUTE_FROM,
270                         XMLSettingsHandler.XML_ATTRIBUTE_FROM, nsu,
271                         ((TimespanCriteria) limit).getFrom());
272                 timeSpan.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_TO,
273                         XMLSettingsHandler.XML_ATTRIBUTE_TO, nsu,
274                         ((TimespanCriteria) limit).getTo());
275                 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_TIMESPAN, "",
276                         timeSpan, indent + 2 * indentAmount);
277             } else if (limit instanceof PortnumberCriteria) {
278                 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_PORTNUMBER,
279                         ((PortnumberCriteria) limit).getPortNumber(), nullAtts,
280                         indent + 2 * indentAmount);
281             } else if (limit instanceof RegularExpressionCriteria) {
282                 writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_URIMATCHES,
283                         ((RegularExpressionCriteria) limit).getRegexp(), nullAtts,
284                         indent + 2 * indentAmount);
285             }
286         }
287 
288         handler.ignorableWhitespace(indentArray, 0, indent);
289         handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,
290                 XMLSettingsHandler.XML_ELEMENT_LIMITS);
291 
292     }
293 
294     private void parseMetaData(int indent) throws SAXException {
295         // Write meta information
296         Attributes nullAtts = new AttributesImpl();
297         handler.ignorableWhitespace(indentArray, 0, indent);
298         handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
299                 XMLSettingsHandler.XML_ELEMENT_META, nullAtts);
300 
301         // Write settings name
302         writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_NAME, settings
303                 .getName(), null, indent + indentAmount);
304 
305         // Write settings description
306         writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION, settings
307                 .getDescription(), null, indent + indentAmount);
308 
309         // Write settings operator
310         writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_OPERATOR, settings
311                 .getOperator(), null, indent + indentAmount);
312 
313         // Write settings description
314         writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_ORGANIZATION, settings
315                 .getOrganization(), null, indent + indentAmount);
316 
317         // Write settings description
318         writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_AUDIENCE, settings
319                 .getAudience(), null, indent + indentAmount);
320 
321         // Write file date
322         String dateStamp = ArchiveUtils.get14DigitDate();
323         writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DATE, dateStamp,
324                 null, indent + indentAmount);
325         try {
326             settings.setLastSavedTime(ArchiveUtils.parse14DigitDate(dateStamp));
327         } catch (ParseException e) {
328             // Should never happen since we just created it. If this exception
329             // is thrown, then there is a bug in ArchiveUtils.
330             e.printStackTrace();
331         }
332 
333         handler.ignorableWhitespace(indentArray, 0, indent);
334         handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_META,
335                 XMLSettingsHandler.XML_ELEMENT_META);
336     }
337 
338     /***
339      * Create SAX events from a {@link ComplexType}.
340      *
341      * @param complexType the object to creat SAX events from.
342      * @param indent the indentation amount for prettyprinting XML.
343      * @throws SAXException is thrown if an error occurs.
344      */
345     private void parseComplexType(ComplexType complexType, int indent)
346             throws SAXException {
347         if (complexType.isTransient()) {
348             return;
349         }
350         MBeanInfo mbeanInfo = complexType.getMBeanInfo(settings);
351         String objectElement = resolveElementName(complexType);
352         AttributesImpl atts = new AttributesImpl();
353         atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_NAME,
354                 XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu, complexType
355                         .getName());
356         if (objectElement == XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT) {
357             // Only 'newObject' elements have a class attribute
358             atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_CLASS,
359                     XMLSettingsHandler.XML_ATTRIBUTE_CLASS, nsu, mbeanInfo
360                             .getClassName());
361         }
362         if (complexType.getParent() == null) {
363             atts = new AttributesImpl();
364         }
365         handler.ignorableWhitespace(indentArray, 0, indent);
366         handler.startElement(nsu, objectElement, objectElement, atts);
367         for (Iterator it = complexType.getAttributeInfoIterator(settings); it
368                 .hasNext();) {
369             ModuleAttributeInfo attribute = (ModuleAttributeInfo) it.next();
370             if (!attribute.isTransient()) {
371                 parseAttribute(complexType, attribute, indent);
372             }
373         }
374         handler.ignorableWhitespace(indentArray, 0, indent);
375         handler.endElement(nsu, objectElement, objectElement);
376     }
377 
378     private void parseAttribute(ComplexType complexType,
379             ModuleAttributeInfo attribute, int indent) throws SAXException {
380         Object value;
381         try {
382             value = complexType
383                     .getLocalAttribute(settings, attribute.getName());
384         } catch (AttributeNotFoundException e) {
385             throw new SAXException(e);
386         }
387         if (orderFile || value != null) {
388             // Write only overridden values unless this is the order file
389             if (attribute.isComplexType()) {
390                 // Call method recursively for complex types
391                 parseComplexType((ComplexType) value, indent + indentAmount);
392             } else {
393                 // Write element
394                 String elementName = SettingsHandler.getTypeName(attribute
395                         .getType());
396                 AttributesImpl atts = new AttributesImpl();
397                 atts.addAttribute(nsu, XMLSettingsHandler.XML_ATTRIBUTE_NAME,
398                         XMLSettingsHandler.XML_ATTRIBUTE_NAME, nsu, attribute
399                                 .getName());
400                 if (value == null) {
401                     try {
402                         value = complexType.getAttribute(attribute.getName());
403                     } catch (Exception e) {
404                         throw new SAXException(
405                                 "Internal error in settings subsystem", e);
406                     }
407                 }
408                 if (value != null) {
409                     handler.ignorableWhitespace(indentArray, 0, indent
410                             + indentAmount);
411                     handler.startElement(nsu, elementName, elementName, atts);
412                     if (value instanceof ListType) {
413                         parseListData(value, indent + indentAmount);
414                         handler.ignorableWhitespace(indentArray, 0, indent
415                                 + indentAmount);
416                     } else {
417                         char valueArray[] = value.toString().toCharArray();
418                         handler.characters(valueArray, 0, valueArray.length);
419                     }
420                     handler.endElement(nsu, elementName, elementName);
421                 }
422             }
423         }
424     }
425 
426     /*** Create SAX events for the content of a {@link ListType}.
427      *
428      * @param value the ListType whose content we create SAX events for.
429      * @param indent the indentation amount for prettyprinting XML.
430      * @throws SAXException is thrown if an error occurs.
431      */
432     private void parseListData(Object value, int indent) throws SAXException {
433         ListType list = (ListType) value;
434         Iterator it = list.iterator();
435         while (it.hasNext()) {
436             Object element = it.next();
437             String elementName =
438                 SettingsHandler.getTypeName(element.getClass().getName());
439             writeSimpleElement(
440                 elementName,
441                 element.toString(),
442                 null,
443                 indent + indentAmount);
444         }
445     }
446 
447     /*** Resolve the XML element name of a {@link ComplexType}.
448      *
449      * @param complexType the object to investigate.
450      * @return the name of the XML element.
451      */
452     private String resolveElementName(ComplexType complexType) {
453         String elementName;
454         if (complexType instanceof ModuleType) {
455             if (complexType.getParent() == null) {
456                 // Top level controller element
457                 elementName = XMLSettingsHandler.XML_ELEMENT_CONTROLLER;
458             } else if (
459                 !orderFile
460                     && complexType.globalSettings().getModule(
461                         complexType.getName())
462                         != null) {
463                 // This is not the order file and we are referencing an object
464                 elementName = XMLSettingsHandler.XML_ELEMENT_OBJECT;
465             } else {
466                 // The object is not referenced before
467                 elementName = XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT;
468             }
469         } else {
470             // It's a map
471             elementName =
472                 SettingsHandler.getTypeName(complexType.getClass().getName());
473         }
474         return elementName;
475     }
476 
477     /*** Create SAX events for a simple element.
478      *
479      * Creates all the SAX events needed for prettyprinting an XML element
480      * with a simple value and possible attributes.
481      *
482      * @param elementName the name of the XML element.
483      * @param value the value to pu inside the XML element.
484      * @param atts the attributes for the XML element.
485      * @param indent the indentation amount for prettyprinting XML.
486      * @throws SAXException is thrown if an error occurs.
487      */
488     private void writeSimpleElement(
489         String elementName,
490         String value,
491         Attributes atts,
492         int indent)
493         throws SAXException {
494         if (atts == null) {
495             atts = new AttributesImpl();
496         }
497         // make sure that the value is never null
498         value = value == null ? "" : value;
499         handler.ignorableWhitespace(indentArray, 0, indent);
500         handler.startElement(nsu, elementName, elementName, atts);
501         handler.characters(value.toCharArray(), 0, value.length());
502         handler.endElement(nsu, elementName, elementName);
503     }
504 
505     /* (non-Javadoc)
506      * @see org.xml.sax.XMLReader#parse(java.lang.String)
507      */
508     public void parse(String systemId) throws IOException, SAXException {
509         // Do nothing. Just for conformance to the XMLReader API.
510     }
511 
512     /* (non-Javadoc)
513      * @see javax.xml.transform.sax.SAXSource#getXMLReader()
514      */
515     public XMLReader getXMLReader() {
516         return this;
517     }
518 
519     /* (non-Javadoc)
520      * @see javax.xml.transform.sax.SAXSource#getInputSource()
521      */
522     public InputSource getInputSource() {
523         return new InputSource();
524     }
525 }