View Javadoc

1   /* Heritrix
2    *
3    * $Id: Heritrix.java 6871 2010-05-26 18:11:48Z nlevitt $
4    *
5    * Created on May 15, 2003
6    *
7    * Copyright (C) 2003 Internet Archive.
8    *
9    * This file is part of the Heritrix web crawler (crawler.archive.org).
10   *
11   * Heritrix is free software; you can redistribute it and/or modify
12   * it under the terms of the GNU Lesser Public License as published by
13   * the Free Software Foundation; either version 2.1 of the License, or
14   * any later version.
15   *
16   * Heritrix is distributed in the hope that it will be useful,
17   * but WITHOUT ANY WARRANTY; without even the implied warranty of
18   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19   * GNU Lesser Public License for more details.
20   *
21   * You should have received a copy of the GNU Lesser Public License
22   * along with Heritrix; if not, write to the Free Software
23   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
24   */
25  package org.archive.crawler;
26  
27  import java.io.File;
28  import java.io.FileInputStream;
29  import java.io.FileNotFoundException;
30  import java.io.FileOutputStream;
31  import java.io.FilenameFilter;
32  import java.io.IOException;
33  import java.io.InputStream;
34  import java.io.PrintStream;
35  import java.io.PrintWriter;
36  import java.net.HttpURLConnection;
37  import java.net.InetAddress;
38  import java.net.URL;
39  import java.net.URLConnection;
40  import java.net.UnknownHostException;
41  import java.util.ArrayList;
42  import java.util.Arrays;
43  import java.util.Collection;
44  import java.util.Collections;
45  import java.util.Enumeration;
46  import java.util.Hashtable;
47  import java.util.Iterator;
48  import java.util.List;
49  import java.util.Map;
50  import java.util.Properties;
51  import java.util.StringTokenizer;
52  import java.util.TimeZone;
53  import java.util.Vector;
54  import java.util.logging.Level;
55  import java.util.logging.LogManager;
56  import java.util.logging.Logger;
57  
58  import javax.management.Attribute;
59  import javax.management.AttributeList;
60  import javax.management.AttributeNotFoundException;
61  import javax.management.DynamicMBean;
62  import javax.management.InstanceAlreadyExistsException;
63  import javax.management.InstanceNotFoundException;
64  import javax.management.InvalidAttributeValueException;
65  import javax.management.MBeanInfo;
66  import javax.management.MBeanNotificationInfo;
67  import javax.management.MBeanOperationInfo;
68  import javax.management.MBeanRegistration;
69  import javax.management.MBeanRegistrationException;
70  import javax.management.MBeanServer;
71  import javax.management.MBeanServerFactory;
72  import javax.management.MalformedObjectNameException;
73  import javax.management.NotCompliantMBeanException;
74  import javax.management.ObjectName;
75  import javax.management.ReflectionException;
76  import javax.management.RuntimeOperationsException;
77  import javax.management.openmbean.CompositeData;
78  import javax.management.openmbean.CompositeDataSupport;
79  import javax.management.openmbean.CompositeType;
80  import javax.management.openmbean.OpenDataException;
81  import javax.management.openmbean.OpenMBeanAttributeInfoSupport;
82  import javax.management.openmbean.OpenMBeanConstructorInfoSupport;
83  import javax.management.openmbean.OpenMBeanInfoSupport;
84  import javax.management.openmbean.OpenMBeanOperationInfoSupport;
85  import javax.management.openmbean.OpenMBeanParameterInfo;
86  import javax.management.openmbean.OpenMBeanParameterInfoSupport;
87  import javax.management.openmbean.OpenType;
88  import javax.management.openmbean.SimpleType;
89  import javax.management.openmbean.TabularData;
90  import javax.management.openmbean.TabularDataSupport;
91  import javax.management.openmbean.TabularType;
92  import javax.naming.CompoundName;
93  import javax.naming.Context;
94  import javax.naming.NameNotFoundException;
95  import javax.naming.NamingException;
96  import javax.naming.NoInitialContextException;
97  
98  import org.apache.commons.cli.Option;
99  import org.archive.crawler.admin.CrawlJob;
100 import org.archive.crawler.admin.CrawlJobErrorHandler;
101 import org.archive.crawler.admin.CrawlJobHandler;
102 import org.archive.crawler.datamodel.CredentialStore;
103 import org.archive.crawler.datamodel.credential.Credential;
104 import org.archive.crawler.event.CrawlStatusListener;
105 import org.archive.crawler.framework.AlertManager;
106 import org.archive.crawler.framework.CrawlController;
107 import org.archive.crawler.framework.exceptions.FatalConfigurationException;
108 import org.archive.crawler.framework.exceptions.InitializationException;
109 import org.archive.crawler.selftest.SelfTestCrawlJobHandler;
110 import org.archive.crawler.settings.XMLSettingsHandler;
111 import org.archive.io.SinkHandler;
112 import org.archive.io.SinkHandlerLogRecord;
113 import org.archive.net.UURI;
114 import org.archive.util.FileUtils;
115 import org.archive.util.IoUtils;
116 import org.archive.util.JmxUtils;
117 import org.archive.util.JndiUtils;
118 import org.archive.util.PropertyUtils;
119 import org.archive.util.TextUtils;
120 
121 import sun.net.www.protocol.file.FileURLConnection;
122 
123 
124 /***
125  * Main class for Heritrix crawler. 
126  * 
127  * Heritrix is usually launched by a shell script that backgrounds heritrix
128  * that redirects all stdout and stderr emitted by heritrix to a log file.  So
129  * that startup messages emitted subsequent to the redirection of stdout and
130  * stderr show on the console, this class prints usage or startup output
131  * such as where the web UI can be found, etc., to a STARTLOG that the shell
132  * script is waiting on.  As soon as the shell script sees output in this file,
133  * it prints its content and breaks out of its wait.
134  * See ${HERITRIX_HOME}/bin/heritrix.
135  * 
136  * <p>Heritrix can also be embedded or launched by webapp initialization or
137  * by JMX bootstrapping.  So far I count 4 methods of instantiation:
138  * <ol>
139  * <li>From this classes main -- the method usually used;</li>
140  * <li>From the Heritrix UI (The local-instances.jsp) page;</li>
141  * <li>A creation by a JMX agent at the behest of a remote JMX client; and</li>
142  * <li>A container such as tomcat or jboss.</li>
143  * </ol>
144  *
145  * @author gojomo
146  * @author Kristinn Sigurdsson
147  * @author Stack
148  */
149 public class Heritrix implements DynamicMBean, MBeanRegistration {
150     /***
151      * Heritrix logging instance.
152      */
153     private static final Logger logger =
154         Logger.getLogger(Heritrix.class.getName());
155     
156     public static final File TMPDIR =
157         new File(System.getProperty("java.io.tmpdir", "/tmp"));
158 
159     /***
160      * Name of the heritrix properties file.
161      */
162     public static final String PROPERTIES = "heritrix.properties";
163 
164     /***
165      * Name of the key to use specifying alternate heritrix properties on
166      * command line.
167      */
168     public static final String PROPERTIES_KEY = PROPERTIES;
169     
170     /***
171      * Prefix used on our properties we'll add to the System.properties list.
172      */
173     public static final String HERITRIX_PROPERTIES_PREFIX = "heritrix.";
174 
175     /***
176      * Prefix used on other properties we'll add to the System.properties 
177      * list (after stripping this prefix). 
178      */
179     public static final String SYSTEM_PREFIX = "system.";
180 
181     /***
182      * Instance of web server if one was started.
183      */
184     private static SimpleHttpServer httpServer = null;
185 
186     /***
187      * CrawlJob handler. Manages multiple crawl jobs at runtime.
188      */
189     private CrawlJobHandler jobHandler = null;
190 
191     /***
192      * Heritrix start log file.
193      *
194      * This file contains standard out produced by this main class for startup
195      * only.  Used by heritrix shell script.  Name here MUST match that in the
196      * <code>bin/heritrix</code> shell script.  This is a DEPENDENCY the shell
197      * wrapper has on this here java heritrix.
198      */
199     public static final String STARTLOG = "heritrix_dmesg.log";
200 
201     /***
202      * Default encoding.
203      * 
204      * Used for content when fetching if none specified.
205      */
206 	public static final String DEFAULT_ENCODING = "ISO-8859-1";
207 
208     /***
209      * Heritrix stderr/stdout log file.
210      *
211      * This file should have nothing in it except messages over which we have
212      * no control (JVM stacktrace, 3rd-party lib emissions).  The wrapper
213      * startup script directs stderr/stdout here. This is an INTERDEPENDENCY
214      * this program has with the wrapper shell script.  Shell can actually
215      * pass us an alternate to use for this file.
216      */
217     public static String DEFAULT_HERITRIX_OUT = "heritrix_out.log";
218 
219     /***
220      * Where to write this classes startup output.
221      * 
222      * This out should only be used if Heritrix is being run from the
223      * command-line.
224      */
225     private static PrintWriter out = null;
226 
227     /***
228      * The org.archive package
229      */
230     public static final String ARCHIVE_PACKAGE = "org.archive.";
231 
232     /***
233      * The crawler package.
234      */
235 	public static final String CRAWLER_PACKAGE = Heritrix.class.getName().
236         substring(0, Heritrix.class.getName().lastIndexOf('.'));
237     
238     /***
239      * The root context for a webapp.
240      */
241     public static final String ROOT_CONTEXT = "/";
242 
243     /***
244      * Set to true if application is started from command line.
245      */
246     public static boolean commandLine = false;
247     
248     /***
249      * True if container initialization has been run.
250      */
251     private static boolean containerInitialized = false;
252     
253     /***
254      * True if properties have been loaded.
255      */
256     private static boolean propertiesLoaded = false;
257     
258     public static final String JAR_SUFFIX = ".jar";
259     
260     private AlertManager alertManager;
261 
262     /***
263      * The context of the GUI webapp.  Default is root.
264      */
265     private static String adminContext = ROOT_CONTEXT;
266     
267     /***
268      * True if we're to put up a GUI.
269      * Cmdline processing can override.
270      */
271     public static boolean gui =
272         !PropertyUtils.getBooleanProperty("heritrix.cmdline.nowui");
273     
274     /***
275      * Port to put the GUI up on.
276      * Cmdline processing can override.
277      */
278     public static int guiPort = SimpleHttpServer.DEFAULT_PORT;
279 
280     
281     /***
282      * A collection containing only localhost.  Used as default value
283      * for guiHosts, and passed to SimpleHttpServer when doing selftest.
284      */
285     final private static Collection<String> LOCALHOST_ONLY =
286      Collections.unmodifiableList(Arrays.asList(new String[] { "127.0.0.1" }));
287 
288     
289     /***
290      * Hosts to bind the GUI webserver to.
291      * By default, only contans localhost.
292      * Set to an empty collection to indicate that all available network
293      * interfaces should be used for the webserver.
294      */
295     public static Collection<String> guiHosts = LOCALHOST_ONLY;
296     
297     
298     /***
299      * Web UI server, realm, context name.
300      */
301     public static String ADMIN = "admin";
302     
303     // OpenMBean support.
304     /***
305      * The MBean server we're registered with (May be null).
306      */
307     private MBeanServer mbeanServer = null;
308     
309     /***
310      * MBean name we were registered as.
311      */
312     private ObjectName mbeanName = null;
313     
314     /***
315      * Keep reference to all instances of Heritrix.
316      * Used by the UI to figure which of the local Heritrice it should
317      * be going against and to figure what to shutdown on the way out (If
318      * there was always a JMX Agent, we wouldn't need to keep this list.  We
319      * could always ask the JMX Agent for all instances. UPDATE: True we could
320      * always ask the JMX Agent but we might keep around this local reference
321      * because it will allow faster, less awkward -- think of marshalling the args
322      * for JMX invoke operation -- access to local Heritrix instances.  A new
323      * usage for this instances Map is in CrawlJob#preRegister to find the hosting
324      * Heritrix instance).
325      */
326     private static Map<String,Heritrix> instances
327      = new Hashtable<String,Heritrix>();
328     
329     private OpenMBeanInfoSupport openMBeanInfo;
330     public static final String STATUS_ATTR = "Status";
331     public static final String VERSION_ATTR = "Version";
332     public static final String ISRUNNING_ATTR = "IsRunning";
333     public static final String ISCRAWLING_ATTR = "IsCrawling";
334     public static final String ALERTCOUNT_ATTR = "AlertCount";
335     public static final String NEWALERTCOUNT_ATTR = "NewAlertCount";
336     public static final String CURRENTJOB_ATTR = "CurrentJob";
337     public static final List ATTRIBUTE_LIST;
338     static {
339         ATTRIBUTE_LIST = Arrays.asList(new String [] {STATUS_ATTR,
340             VERSION_ATTR, ISRUNNING_ATTR, ISCRAWLING_ATTR,
341             ALERTCOUNT_ATTR, NEWALERTCOUNT_ATTR, CURRENTJOB_ATTR});
342     }
343     
344     public static final String START_OPER = "start";
345     public static final String STOP_OPER = "stop";
346     public static final String DESTROY_OPER = "destroy";
347     public static final String INTERRUPT_OPER = "interrupt";
348     public static final String START_CRAWLING_OPER = "startCrawling";
349     public static final String STOP_CRAWLING_OPER = "stopCrawling";
350     public static final String ADD_CRAWL_JOB_OPER = "addJob";
351     public static final String TERMINATE_CRAWL_JOB_OPER =
352         "terminateCurrentJob";
353     public static final String DELETE_CRAWL_JOB_OPER = "deleteJob";
354     public static final String ALERT_OPER = "alert";
355     public static final String ADD_CRAWL_JOB_BASEDON_OPER = "addJobBasedon";
356     public static final String PENDING_JOBS_OPER = "pendingJobs";
357     public static final String COMPLETED_JOBS_OPER = "completedJobs";
358     public static final String CRAWLEND_REPORT_OPER = "crawlendReport";
359     public static final String SHUTDOWN_OPER = "shutdown";
360     public static final String LOG_OPER = "log";
361     public static final String REBIND_JNDI_OPER = "rebindJNDI";
362     public static final List OPERATION_LIST;
363     static {
364         OPERATION_LIST = Arrays.asList(new String [] {START_OPER, STOP_OPER,
365             INTERRUPT_OPER, START_CRAWLING_OPER, STOP_CRAWLING_OPER,
366             ADD_CRAWL_JOB_OPER, ADD_CRAWL_JOB_BASEDON_OPER,
367             DELETE_CRAWL_JOB_OPER, ALERT_OPER, PENDING_JOBS_OPER,
368             COMPLETED_JOBS_OPER, CRAWLEND_REPORT_OPER, SHUTDOWN_OPER,
369             LOG_OPER, DESTROY_OPER, TERMINATE_CRAWL_JOB_OPER,
370             REBIND_JNDI_OPER});
371     }
372     private CompositeType jobCompositeType = null;
373     private TabularType jobsTabularType = null;
374     public static final String [] JOB_KEYS =
375         new String [] {"uid", "name", "status"};
376 
377     private static String adminUsername;
378 
379     private static String adminPassword;
380     
381     /***
382      * Constructor.
383      * Does not register the created instance with JMX.  Assumed this
384      * constructor is used by such as JMX agent creating an instance of
385      * Heritrix at the commmand of a remote client (In this case Heritrix will
386      * be registered by the invoking agent).
387      * @throws IOException
388      */
389     public Heritrix() throws IOException {
390         this(null, false);
391     }
392     
393     public Heritrix(final boolean jmxregister) throws IOException {
394         this(null, jmxregister);
395     }
396     
397     /***
398      * Constructor.
399      * @param name If null, we bring up the default Heritrix instance.
400      * @param jmxregister True if we are to register this instance with JMX
401      * agent.
402      * @throws IOException
403      */
404     public Heritrix(final String name, final boolean jmxregister)
405     throws IOException {
406         this(name, jmxregister, new CrawlJobHandler(getJobsdir()));
407     }
408     
409     /***
410      * Constructor.
411      * @param name If null, we bring up the default Heritrix instance.
412      * @param jmxregister True if we are to register this instance with JMX
413      * agent.
414      * @param cjh CrawlJobHandler to use.
415      * @throws IOException
416      */
417     public Heritrix(final String name, final boolean jmxregister,
418             final CrawlJobHandler cjh)
419     throws IOException {
420         super();
421         containerInitialization();
422         this.jobHandler = cjh;
423         this.openMBeanInfo = buildMBeanInfo();
424         // Set up the alerting system.  SinkHandler is also a global so will
425         // catch alerts for all running Heritrix instances.  Will need to
426         // address (Add name of instance that threw the alert to SinkRecord?).
427         final SinkHandler sinkHandler = SinkHandler.getInstance();
428         if (sinkHandler == null) {
429             throw new NullPointerException("SinkHandler not found.");
430         }
431         // Adapt the alerting system to use SinkHandler.
432         this.alertManager = new AlertManager() {
433             public void add(SinkHandlerLogRecord record) {
434                 sinkHandler.publish(record);
435             }
436 
437             public Vector getAll() {
438                 return sinkHandler.getAll();
439             }
440 
441             public Vector getNewAll() {
442                 return sinkHandler.getAllUnread();
443             }
444 
445             public SinkHandlerLogRecord get(String alertID) {
446                 return sinkHandler.get(Long.parseLong(alertID));
447             }
448             
449             public int getCount() {
450                 return sinkHandler.getCount();
451             }
452 
453             public int getNewCount() {
454                 return sinkHandler.getUnreadCount();
455             }
456 
457             public void remove(String alertID) {
458                 sinkHandler.remove(Long.parseLong(alertID));
459             }
460 
461             public void read(String alertID) {
462                 sinkHandler.read(Long.parseLong(alertID));
463             }
464         };
465         
466         try {
467             Heritrix.registerHeritrix(this, name, jmxregister);
468         } catch (InstanceAlreadyExistsException e) {
469             throw new RuntimeException(e);
470         } catch (MBeanRegistrationException e) {
471             throw new RuntimeException(e);
472         } catch (NotCompliantMBeanException e) {
473             throw new RuntimeException(e);
474         } catch (MalformedObjectNameException e) {
475             throw new RuntimeException(e);
476         }
477     }
478     
479     /***
480      * Run setup tasks for this 'container'. Idempotent.
481      * 
482      * @throws IOException
483      */
484     protected static void containerInitialization() throws IOException {
485         if (Heritrix.containerInitialized) {
486             return;
487         }
488         Heritrix.containerInitialized = true;
489         // Load up the properties.  This invocation adds heritrix properties
490         // to system properties so all available via System.getProperty.
491         // Note, loadProperties and patchLogging have global effects.  May be an
492         // issue if we're running inside a container such as tomcat or jboss.
493         Heritrix.loadProperties();
494         Heritrix.patchLogging();
495         Heritrix.configureTrustStore();
496         // Will run on SIGTERM but not on SIGKILL, unfortunately.
497         // Otherwise, ensures we cleanup after ourselves (Deregister from
498         // JMX and JNDI).
499         Runtime.getRuntime().addShutdownHook(
500             Heritrix.getShutdownThread(false, 0, "Heritrix shutdown hook"));
501         // Register this heritrix 'container' though we may be inside another
502         // tomcat or jboss container.
503         try {
504             registerContainerJndi();
505         } catch (Exception e) {
506             logger.log(Level.WARNING, "Failed jndi container registration.", e);
507         }
508     }
509     
510     /***
511      * Do inverse of construction. Used by anyone who does a 'new Heritrix' when
512      * they want to cleanup the instance.
513      * Of note, there may be Heritrix threads still hanging around after the
514      * call to destroy completes.  They'll eventually go down after they've
515      * finished their cleanup routines.  In particular, if you are watching
516      * Heritrix via JMX, you can see the Heritrix instance JMX bean unregister
517      * ahead of the CrawlJob JMX bean that its hosting.
518      */
519     public void destroy() {
520         stop();
521         try {
522             Heritrix.unregisterHeritrix(this);
523         } catch (InstanceNotFoundException e) {
524             e.printStackTrace();
525         } catch (MBeanRegistrationException e) {
526             e.printStackTrace();
527         } catch (NullPointerException e) {
528             e.printStackTrace();
529         }
530         this.jobHandler = null;
531         this.openMBeanInfo = null;
532     }
533     
534     /***
535      * Launch program.
536      * Optionally will launch a web server to host UI.  Will also register
537      * Heritrix MBean with first found JMX Agent (Usually the 1.5.0 JVM
538      * Agent).
539      * 
540      * @param args Command line arguments.
541      * @throws Exception
542      */
543     public static void main(String[] args)
544     throws Exception {
545         Heritrix.commandLine = true;
546         
547         // Set timezone here.  Would be problematic doing it if we're running
548         // inside in a container.
549         TimeZone.setDefault(TimeZone.getTimeZone("GMT"));
550         
551         File startLog = new File(getHeritrixHome(), STARTLOG);
552         Heritrix.out = new PrintWriter(isDevelopment()? 
553             System.out: new PrintStream(new FileOutputStream(startLog)));
554         
555         try {
556             containerInitialization();
557             String status = doCmdLineArgs(args);
558             if (status != null) {
559                 Heritrix.out.println(status);
560             }
561         }
562 
563         catch(Exception e) {
564             // Show any exceptions in STARTLOG.
565             e.printStackTrace(Heritrix.out);
566             throw e;
567         }
568 
569         finally {
570             // If not development, close the file that signals the wrapper
571             // script that we've started.  Otherwise, just flush it; if in
572             // development, the output is probably a console.
573             if (!isDevelopment()) {
574                 if (Heritrix.out != null) {
575                     Heritrix.out.close();
576                 }
577                 System.out.println("Heritrix version: " +
578                         Heritrix.getVersion());
579             } else {
580                 if (Heritrix.out != null) {
581                     Heritrix.out.flush();
582                 }
583             }
584         }
585     }
586     
587     protected static String doCmdLineArgs(final String [] args)
588     throws Exception {
589         // Get defaults for commandline arguments from the properties file.
590         String tmpStr = PropertyUtils.
591             getPropertyOrNull("heritrix.context");
592         if (tmpStr != null)  {
593             Heritrix.adminContext = tmpStr;
594         }
595         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.port");
596         if (tmpStr != null) {
597             Heritrix.guiPort = Integer.parseInt(tmpStr);
598         }
599         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.admin");
600         String adminLoginPassword = (tmpStr == null)? "": tmpStr;
601         String crawlOrderFile =
602             PropertyUtils.getPropertyOrNull("heritrix.cmdline.order");
603         tmpStr = PropertyUtils.getPropertyOrNull("heritrix.cmdline.run");
604         boolean runMode =
605             PropertyUtils.getBooleanProperty("heritrix.cmdline.run");
606         boolean selfTest = false;
607         String selfTestName = null;
608         CommandLineParser clp = new CommandLineParser(args, Heritrix.out,
609             Heritrix.getVersion());
610         List arguments = clp.getCommandLineArguments();
611         Option [] options = clp.getCommandLineOptions();
612 
613         // Check passed argument.  Only one argument, the ORDER_FILE is allowed.
614         // If one argument, make sure exists and xml suffix.
615         if (arguments.size() > 1) {
616             clp.usage(1);
617         } else if (arguments.size() == 1) {
618             crawlOrderFile = (String)arguments.get(0);
619             if (!(new File(crawlOrderFile).exists())) {
620                 clp.usage("ORDER.XML <" + crawlOrderFile +
621                     "> specified does not exist.", 1);
622             }
623             // Must end with '.xml'
624             if (crawlOrderFile.length() > 4 &&
625                     !crawlOrderFile.substring(crawlOrderFile.length() - 4).
626                         equalsIgnoreCase(".xml")) {
627                 clp.usage("ORDER.XML <" + crawlOrderFile +
628                     "> does not have required '.xml' suffix.", 1);
629             }
630         }
631 
632         // Now look at options passed.
633         for (int i = 0; i < options.length; i++) {
634             switch(options[i].getId()) {
635                 case 'h':
636                     clp.usage();
637                     break;
638 
639                 case 'a':
640                     adminLoginPassword = options[i].getValue();
641                     break;
642 
643                 case 'n':
644                     if (crawlOrderFile == null) {
645                         clp.usage("You must specify an ORDER_FILE with" +
646                             " '--nowui' option.", 1);
647                     }
648                     Heritrix.gui = false;
649                     break;
650                 
651                 case 'b':
652                     Heritrix.guiHosts = parseHosts(options[i].getValue());
653                     break;
654 
655                 case 'p':
656                     try {
657                         Heritrix.guiPort =
658                             Integer.parseInt(options[i].getValue());
659                     } catch (NumberFormatException e) {
660                         clp.usage("Failed parse of port number: " +
661                             options[i].getValue(), 1);
662                     }
663                     if (Heritrix.guiPort <= 0) {
664                         clp.usage("Nonsensical port number: " +
665                             options[i].getValue(), 1);
666                     }
667                     break;
668 
669                 case 'r':
670                     runMode = true;
671                     break;
672 
673                 case 's':
674                     selfTestName = options[i].getValue();
675                     selfTest = true;
676                     break;
677 
678                 default:
679                     assert false: options[i].getId();
680             }
681         }
682 
683         // Ok, we should now have everything to launch the program.
684         String status = null;
685         if (selfTest) {
686             // If more than just '--selftest' and '--port' passed, then
687             // there is confusion on what is being asked of us.  Print usage
688             // rather than proceed.
689             for (int i = 0; i < options.length; i++) {
690                 if (options[i].getId() != 'p' && options[i].getId() != 's') {
691                     clp.usage(1);
692                 }
693             }
694 
695             if (arguments.size() > 0) {
696                 // No arguments accepted by selftest.
697                 clp.usage(1);
698             }
699             status = selftest(selfTestName, Heritrix.guiPort);
700         } else {
701 			if (!Heritrix.gui) {
702 				if (options.length > 1) {
703 					// If more than just '--nowui' passed, then there is
704 					// confusion on what is being asked of us. Print usage
705 					// rather than proceed.
706 					clp.usage(1);
707 				}
708 				Heritrix h = new Heritrix(true);
709 				status = h.doOneCrawl(crawlOrderFile);
710 			} else {
711                 if (!isValidLoginPasswordString(adminLoginPassword)) {
712                     // exit printing usage info if no webui login:password given
713                     clp.usage("Invalid admin login:password value, or none "
714                             + "specified. ", 1);
715                 }
716 				status = startEmbeddedWebserver(
717                         Heritrix.guiHosts, Heritrix.guiPort,
718 						adminLoginPassword);
719 				Heritrix h = new Heritrix(true);
720 
721 				String tmp = h.launch(crawlOrderFile, runMode);
722 				if (tmp != null) {
723 					status += ('\n' + tmp);
724 				}
725 			}
726 		}
727         return status;
728     }
729     
730     /***
731 	 * @return The file we dump stdout and stderr into.
732 	 */
733     public static String getHeritrixOut() {
734         String tmp = System.getProperty("heritrix.out");
735         if (tmp == null || tmp.length() == 0) {
736             tmp = Heritrix.DEFAULT_HERITRIX_OUT;
737         }
738         return tmp;
739     }
740 
741     /***
742      * Exploit <code>-Dheritrix.home</code> if available to us.
743      * Is current working dir if no heritrix.home property supplied.
744      * @return Heritrix home directory.
745      * @throws IOException
746      */
747     protected static File getHeritrixHome()
748     throws IOException {
749         File heritrixHome = null;
750         String home = System.getProperty("heritrix.home");
751         if (home != null && home.length() > 0) {
752             heritrixHome = new File(home);
753             if (!heritrixHome.exists()) {
754                 throw new IOException("HERITRIX_HOME <" + home +
755                     "> does not exist.");
756             }
757         } else {
758             heritrixHome = new File(new File("").getAbsolutePath());
759         }
760         return heritrixHome;
761     }
762     
763     /***
764      * @return The directory into which we put jobs.  If the system property
765      * 'heritrix.jobsdir' is set, we will use its value in place of the default
766      * 'jobs' directory in the current working directory.
767      * @throws IOException
768      */
769     public static File getJobsdir() throws IOException {
770         Heritrix.loadProperties(); // if called in constructor
771         String jobsdirStr = System.getProperty("heritrix.jobsdir", "jobs");
772         File jobsdir = new File(jobsdirStr);
773         return (jobsdir.isAbsolute())?
774             jobsdir:
775             new File(getHeritrixHome(), jobsdirStr);
776     }
777     
778     /***
779      * Get and check for existence of expected subdir.
780      *
781      * If development flag set, then look for dir under src dir.
782      *
783      * @param subdirName Dir to look for.
784      * @return The extant subdir.  Otherwise null if we're running
785      * in a webapp context where there is no conf directory available.
786      * @throws IOException if unable to find expected subdir.
787      */
788     protected static File getSubDir(String subdirName)
789     throws IOException {
790         return getSubDir(subdirName, true);
791     }
792     
793     /***
794      * Get and optionally check for existence of subdir.
795      *
796      * If development flag set, then look for dir under src dir.
797      *
798      * @param subdirName Dir to look for.
799      * @param fail True if we are to fail if directory does not
800      * exist; false if we are to return false if the directory does not exist.
801      * @return The extant subdir.  Otherwise null if we're running
802      * in a webapp context where there is no subdir directory available.
803      * @throws IOException if unable to find expected subdir.
804      */
805     protected static File getSubDir(String subdirName, boolean fail)
806     throws IOException {
807         String path = isDevelopment()?
808             "src" + File.separator + subdirName:
809             subdirName;
810         File dir = new File(getHeritrixHome(), path);
811         if (!dir.exists()) {
812             if (fail) {
813                 throw new IOException("Cannot find subdir: " + subdirName);
814             }
815             dir = null;
816         }
817         return dir;
818     }
819     
820     /***
821      * Test string is valid login/password string.
822      *
823      * A valid login/password string has the login and password compounded
824      * w/ a ':' delimiter.
825      *
826      * @param str String to test.
827      * @return True if valid password/login string.
828      */
829     protected static boolean isValidLoginPasswordString(String str) {
830         boolean isValid = false;
831         StringTokenizer tokenizer = new StringTokenizer(str,  ":");
832         if (tokenizer.countTokens() == 2) {
833             String login = ((String)tokenizer.nextElement()).trim();
834             String password = ((String)tokenizer.nextElement()).trim();
835             if (login.length() > 0 && password.length() > 0) {
836                 isValid = true;
837             }
838         }
839         return isValid;
840     }
841 
842     protected static boolean isDevelopment() {
843         return System.getProperty("heritrix.development") != null;
844     }
845 
846     /***
847      * Load the heritrix.properties file.
848      * 
849      * Adds any property that starts with
850      * <code>HERITRIX_PROPERTIES_PREFIX</code>
851      * or <code>ARCHIVE_PACKAGE</code>
852      * into system properties (except logging '.level' directives).
853      * @return Loaded properties.
854      * @throws IOException
855      */
856     protected static Properties loadProperties()
857     throws IOException {
858         if (Heritrix.propertiesLoaded) {
859             return System.getProperties();
860         }
861         Heritrix.propertiesLoaded = true;
862             
863         Properties properties = new Properties();
864         properties.load(getPropertiesInputStream());
865         
866         // Any property that begins with ARCHIVE_PACKAGE, make it
867         // into a system property. While iterating, check to see if anything
868         // defined on command-line, and if so, it overrules whats in
869         // heritrix.properties.
870         for (Enumeration e = properties.keys(); e.hasMoreElements();) {
871             String key = ((String)e.nextElement()).trim();
872         	if (key.startsWith(ARCHIVE_PACKAGE) ||
873                     key.startsWith(HERITRIX_PROPERTIES_PREFIX)) {
874                 // Don't add the heritrix.properties entries that are
875                 // changing the logging level of particular classes.
876                 String value = properties.getProperty(key).trim();
877                 if (key.indexOf(".level") < 0) {
878                     copyToSystemProperty(key, value);
879                 }
880             }  else if (key.startsWith(SYSTEM_PREFIX)) {
881                 String value = properties.getProperty(key).trim();
882                 copyToSystemProperty(key.substring(SYSTEM_PREFIX.length()), value); 
883             }
884         }
885         return properties;
886     }
887 
888     /***
889      * Copy the given key-value into System properties, as long as there
890      * is no existing value. 
891      * @param key property key 
892      * @param value property value
893      */
894     protected static void copyToSystemProperty(String key, String value) {
895         if (System.getProperty(key) == null ||
896             System.getProperty(key).length() == 0) {
897             System.setProperty(key, value);
898         }
899     }
900 
901     protected static InputStream getPropertiesInputStream()
902     throws IOException {
903         File file = null;
904         // Look to see if properties have been passed on the cmd-line.
905         String alternateProperties = System.getProperty(PROPERTIES_KEY);
906         if (alternateProperties != null && alternateProperties.length() > 0) {
907             file = new File(alternateProperties);
908         }
909         // Get properties from conf directory if one available.
910         if ((file == null || !file.exists()) && getConfdir(false) != null) {
911             file = new File(getConfdir(), PROPERTIES);
912             if (!file.exists()) {
913                 // If no properties file in the conf dir, set file back to
914                 // null so we go looking for heritrix.properties on classpath.
915                 file = null;
916             }
917         }
918         // If not on the command-line, there is no conf dir. Then get the
919         // properties from the CLASSPATH (Classpath file separator is always
920         // '/', whatever the platform.
921         InputStream is = (file != null)?
922             new FileInputStream(file):
923             Heritrix.class.getResourceAsStream("/" + PROPERTIES_KEY);
924         if (is == null) {
925             throw new IOException("Failed to load properties file from" +
926                 " filesystem or from classpath.");
927         }
928         return is;
929     }
930 
931     /***
932      * If the user hasn't altered the default logging parameters, tighten them
933      * up somewhat: some of our libraries are way too verbose at the INFO or
934      * WARNING levels.
935      * 
936      * This might be a problem running inside in someone else's
937      * container.  Container's seem to prefer commons logging so we
938      * ain't messing them doing the below.
939      *
940      * @throws IOException
941      * @throws SecurityException
942      */
943     protected static void patchLogging()
944     throws SecurityException, IOException {
945         if (System.getProperty("java.util.logging.config.class") != null) {
946             return;
947         }
948 
949         if (System.getProperty("java.util.logging.config.file") != null) {
950             return;
951         }
952 
953         // No user-set logging properties established; use defaults
954         // from distribution-packaged 'heritrix.properties'.
955         LogManager.getLogManager().
956             readConfiguration(getPropertiesInputStream());
957     }
958 
959     /***
960      * Configure our trust store.
961      *
962      * If system property is defined, then use it for our truststore.  Otherwise
963      * use the heritrix truststore under conf directory if it exists.
964      * 
965      * <p>If we're not launched from the command-line, we will not be able
966      * to find our truststore.  The truststore is nor normally used so rare
967      * should this be a problem (In case where we don't use find our trust
968      * store, we'll use the 'default' -- either the JVMs or the containers).
969      */
970     protected static void configureTrustStore() {
971         // Below must be defined in jsse somewhere but can' find it.
972         final String TRUSTSTORE_KEY = "javax.net.ssl.trustStore";
973         String value = System.getProperty(TRUSTSTORE_KEY);
974         File confdir = null;
975         try {
976             confdir = getConfdir(false);
977         } catch (IOException e) {
978             logger.log(Level.WARNING, "Failed to get confdir.", e);
979         }
980         if ((value == null || value.length() <= 0) && confdir != null) {
981             // Use the heritrix store if it exists on disk.
982             File heritrixStore = new File(confdir, "heritrix.cacerts");
983             if(heritrixStore.exists()) {
984                 value = heritrixStore.getAbsolutePath();
985             }
986         }
987 
988         if (value != null && value.length() > 0) {
989             System.setProperty(TRUSTSTORE_KEY, value);
990         }
991     }
992 
993     /***
994      * Run the selftest
995      *
996      * @param oneSelfTestName Name of a test if we are to run one only rather
997      * than the default running all tests.
998      * @param port Port number to use for web UI.
999      *
1000      * @exception Exception
1001      * @return Status of how selftest startup went.
1002      */
1003     protected static String selftest(final String oneSelfTestName,
1004             final int port)
1005         throws Exception {
1006         // Put up the webserver w/ the root and selftest webapps only.
1007         final String SELFTEST = "selftest";
1008         Heritrix.httpServer = new SimpleHttpServer(SELFTEST,
1009             Heritrix.adminContext, LOCALHOST_ONLY, port, true);
1010         // Set up digest auth for a section of the server so selftest can run
1011         // auth tests.  Looks like can only set one login realm going by the
1012         // web.xml dtd.  Otherwise, would be nice to selftest basic and digest.
1013         // Have login, password and role all be SELFTEST.  Must match what is
1014         // in the selftest order.xml file.
1015         Heritrix.httpServer.setAuthentication(SELFTEST, Heritrix.adminContext,
1016             SELFTEST, SELFTEST, SELFTEST);
1017         Heritrix.httpServer.startServer();
1018         // Get the order file from the CLASSPATH unless we're running in dev
1019         // environment.
1020         File selftestDir = (isDevelopment())?
1021             new File(getConfdir(), SELFTEST):
1022             new File(File.separator + SELFTEST);
1023         File crawlOrderFile = new File(selftestDir, "order.xml");
1024         // Create a job based off the selftest order file.  Then use this as
1025         // a template to pass jobHandler.newJob().  Doing this gets our
1026         // selftest output to show under the jobs directory.
1027         // Pass as a seed a pointer to the webserver we just put up.
1028         final String ROOTURI = "127.0.0.1:" + Integer.toString(port);
1029         String selfTestUrl = "http://" + ROOTURI + '/';
1030         if (oneSelfTestName != null && oneSelfTestName.length() > 0) {
1031             selfTestUrl += (oneSelfTestName + '/');
1032         }
1033         CrawlJobHandler cjh = new SelfTestCrawlJobHandler(getJobsdir(),
1034                 oneSelfTestName, selfTestUrl);
1035         Heritrix h = new Heritrix("Selftest", true, cjh);
1036         CrawlJob job = createCrawlJob(cjh, crawlOrderFile, "Template");
1037         job = h.getJobHandler().newJob(job, null, SELFTEST,
1038             "Integration self test", selfTestUrl, CrawlJob.PRIORITY_AVERAGE);
1039         h.getJobHandler().addJob(job);
1040         // Before we start, need to change some items in the settings file.
1041         CredentialStore cs = (CredentialStore)job.getSettingsHandler().
1042             getOrder().getAttribute(CredentialStore.ATTR_NAME);
1043         for (Iterator i = cs.iterator(null); i.hasNext();) {
1044             ((Credential)i.next()).setCredentialDomain(null, ROOTURI);
1045         }
1046         h.getJobHandler().startCrawler();
1047         StringBuffer buffer = new StringBuffer();
1048         buffer.append("Heritrix " + Heritrix.getVersion() +
1049                 " selftest started.");
1050         buffer.append("\nSelftest first crawls " + selfTestUrl +
1051             " and then runs an analysis.");
1052         buffer.append("\nResult of analysis printed to " +
1053             getHeritrixOut() + " when done.");
1054         buffer.append("\nSelftest job directory for logs and arcs:\n" +
1055             job.getDirectory().getAbsolutePath());
1056         return buffer.toString();
1057     }
1058 
1059     /***
1060      * Launch the crawler without a web UI and run the passed crawl only.
1061      * 
1062      * Specialized version of {@link #launch()}.
1063      *
1064      * @param crawlOrderFile The crawl order to crawl.
1065      * @throws InitializationException
1066      * @throws InvalidAttributeValueException
1067      * @return Status string.
1068      */
1069     protected String doOneCrawl(String crawlOrderFile)
1070     throws InitializationException, InvalidAttributeValueException {
1071         return doOneCrawl(crawlOrderFile, null);
1072     }
1073     
1074     /***
1075      * Launch the crawler without a web UI and run passed crawl only.
1076      * 
1077      * Specialized version of {@link #launch()}.
1078      *
1079      * @param crawlOrderFile The crawl order to crawl.
1080      * @param listener Register this crawl status listener before starting
1081      * crawl (You can use this listener to notice end-of-crawl).
1082      * @throws InitializationException
1083      * @throws InvalidAttributeValueException
1084      * @return Status string.
1085      */
1086     protected String doOneCrawl(String crawlOrderFile,
1087         CrawlStatusListener listener)
1088     throws InitializationException, InvalidAttributeValueException {
1089         XMLSettingsHandler handler =
1090             new XMLSettingsHandler(new File(crawlOrderFile));
1091         handler.initialize();
1092         CrawlController controller = new CrawlController();
1093         controller.initialize(handler);
1094         if (listener != null) {
1095             controller.addCrawlStatusListener(listener);
1096         }
1097         controller.requestCrawlStart();
1098         return "Crawl started using " + crawlOrderFile + ".";
1099     }
1100     
1101     /***
1102      * Launch the crawler for a web UI.
1103      *
1104      * Crawler hangs around waiting on jobs.
1105      *
1106      * @exception Exception
1107      * @return A status string describing how the launch went.
1108      * @throws Exception
1109      */
1110     public String launch() throws Exception {
1111         return launch(null, false);
1112     }
1113 
1114     /***
1115      * Launch the crawler for a web UI.
1116      *
1117      * Crawler hangs around waiting on jobs.
1118      * 
1119      * @param crawlOrderFile File to crawl.  May be null.
1120      * @param runMode Whether crawler should be set to run mode.
1121      *
1122      * @exception Exception
1123      * @return A status string describing how the launch went.
1124      */
1125     public String launch(String crawlOrderFile, boolean runMode)
1126     throws Exception {
1127         String status = null;
1128         if (crawlOrderFile != null) {
1129             addCrawlJob(crawlOrderFile, "Autolaunched", "", "");
1130             if(runMode) {
1131                 this.jobHandler.startCrawler();
1132                 status = "Job being crawled: " + crawlOrderFile;
1133             } else {
1134                 status = "Crawl job ready and pending: " + crawlOrderFile;
1135             }
1136         } else if(runMode) {
1137             // The use case is that jobs are to be run on a schedule and that
1138             // if the crawler is in run mode, then the scheduled job will be
1139             // run at appropriate time.  Otherwise, not.
1140             this.jobHandler.startCrawler();
1141             status = "Crawler set to run mode.";
1142         }
1143         return status;
1144     }
1145     
1146     /***
1147      * Start up the embedded Jetty webserver instance.
1148      * This is done when we're run from the command-line.
1149      * @param port Port number to use for web UI.
1150      * @param adminLoginPassword Compound of login and password.
1151      * @throws Exception
1152      * @return Status on webserver startup.
1153      * @deprecated  Use startEmbeddedWebserver(hosts, port, adminLoginPassword)
1154      */
1155     protected static String startEmbeddedWebserver(final int port,
1156         final boolean lho, final String adminLoginPassword)
1157     throws Exception {
1158         ArrayList<String> hosts = new ArrayList<String>();
1159         if (lho) {
1160             hosts.add("127.0.0.1");
1161         }
1162         return startEmbeddedWebserver(hosts, port, adminLoginPassword);
1163     }
1164 
1165     
1166     /***
1167      * Parses a list of host names.
1168      * 
1169      * <p>If the given string is <code>/</code>, then an empty
1170      * collection is returned.  This indicates that all available network
1171      * interfaces should be used.
1172      * 
1173      * <p>Otherwise, the string must contain a comma-separated list of 
1174      * IP addresses or host names.  The parsed list is then returned.
1175      * 
1176      * @param hosts  the string to parse
1177      * @return  the parsed collection of hosts 
1178      */
1179     private static Collection<String> parseHosts(String hosts) {
1180         hosts = hosts.trim();
1181         if (hosts.equals("/")) {
1182             return new ArrayList<String>(1);
1183         }
1184         String[] hostArray = hosts.split(",");
1185         for (int i = 0; i < hostArray.length; i++) {
1186             hostArray[i] = hostArray[i].trim();
1187         }
1188         return Arrays.asList(hostArray);
1189     }
1190     
1191     /***
1192      * Start up the embedded Jetty webserver instance.
1193      * This is done when we're run from the command-line.
1194      * 
1195      * @param hosts  a list of IP addresses or hostnames to bind to, or an
1196      *               empty collection to bind to all available network 
1197      *               interfaces
1198      * @param port Port number to use for web UI.
1199      * @param adminLoginPassword Compound of login and password.
1200      * @throws Exception
1201      * @return Status on webserver startup.
1202      */
1203     protected static String startEmbeddedWebserver(Collection<String> hosts, 
1204         int port, String adminLoginPassword) 
1205     throws Exception {
1206         adminUsername = adminLoginPassword.
1207             substring(0, adminLoginPassword.indexOf(":"));
1208         adminPassword = adminLoginPassword.
1209             substring(adminLoginPassword.indexOf(":") + 1);
1210         Heritrix.httpServer = new SimpleHttpServer("admin",
1211             Heritrix.adminContext, hosts, port, false);
1212         
1213         final String DOTWAR = ".war";
1214         final String SELFTEST = "selftest";
1215         
1216         // Look for additional WAR files beyond 'selftest' and 'admin'.
1217         File[] wars = getWarsdir().listFiles();
1218         for(int i = 0; i < wars.length; i++) {
1219             if(wars[i].isFile()) {
1220                 final String warName = wars[i].getName();
1221                 final String warNameNC = warName.toLowerCase();
1222                 if(warNameNC.endsWith(DOTWAR) &&
1223                         !warNameNC.equals(ADMIN + DOTWAR) &&
1224                         !warNameNC.equals(SELFTEST + DOTWAR)) {
1225                     int dot = warName.indexOf('.');
1226                     Heritrix.httpServer.addWebapp(warName.substring(0, dot),
1227                             null, true);
1228                 }
1229             }
1230         }
1231         
1232         // Name of passed 'realm' must match what is in configured in web.xml.
1233         // We'll use ROLE for 'realm' and 'role'.
1234         final String ROLE = ADMIN;
1235         Heritrix.httpServer.setAuthentication(ROLE, Heritrix.adminContext,
1236             adminUsername, adminPassword, ROLE);
1237         Heritrix.httpServer.startServer();
1238         StringBuffer buffer = new StringBuffer();
1239         buffer.append("Heritrix " + Heritrix.getVersion() + " is running.");
1240         for (String host: httpServer.getHosts()) {
1241             buffer.append("\nWeb console is at: http://");
1242             buffer.append(host).append(':').append(port);
1243         }
1244         buffer.append("\nWeb console login and password: " +
1245             adminUsername + "/" + adminPassword);
1246         return buffer.toString();
1247     }
1248     
1249     /***
1250      * Replace existing administrator login info with new info.
1251      * 
1252      * @param newUsername new administrator login username
1253      * @param newPassword new administrator login password
1254      */
1255     public static void resetAuthentication(String newUsername,
1256             String newPassword) {
1257         Heritrix.httpServer.resetAuthentication(ADMIN, adminUsername,
1258                 newUsername, newPassword);
1259         adminUsername = newUsername;
1260         adminPassword = newPassword; 
1261         logger.info("administrative login changed to "
1262                 +newUsername+":"+newPassword);
1263     }
1264 
1265     protected static CrawlJob createCrawlJob(CrawlJobHandler handler,
1266             File crawlOrderFile, String name)
1267     throws InvalidAttributeValueException {
1268         XMLSettingsHandler settings = new XMLSettingsHandler(crawlOrderFile);
1269         settings.initialize();
1270         return new CrawlJob(handler.getNextJobUID(), name, settings,
1271             new CrawlJobErrorHandler(Level.SEVERE),
1272             CrawlJob.PRIORITY_HIGH,
1273             crawlOrderFile.getAbsoluteFile().getParentFile());
1274     }
1275     
1276     /***
1277      * This method is called when we have an order file to hand that we want
1278      * to base a job on.  It leaves the order file in place and just starts up
1279      * a job that uses all the order points to for locations for logs, etc.
1280      * @param orderPathOrUrl Path to an order file or to a seeds file.
1281      * @param name Name to use for this job.
1282      * @param description 
1283      * @param seeds 
1284      * @return A status string.
1285      * @throws IOException 
1286      * @throws FatalConfigurationException 
1287      */
1288     public String addCrawlJob(String orderPathOrUrl, String name,
1289             String description, String seeds)
1290     throws IOException, FatalConfigurationException {
1291         if (!UURI.hasScheme(orderPathOrUrl)) {
1292             // Assume its a file path.
1293             return addCrawlJob(new File(orderPathOrUrl), name, description,
1294                     seeds);
1295         }
1296 
1297         // Otherwise, must be an URL.
1298         URL url = new URL(orderPathOrUrl);
1299 
1300         // Handle http and file only for now (Tried to handle JarUrlConnection
1301         // but too awkward undoing jar stream.  Rather just look for URLs that
1302         // end in '.jar').
1303         String result = null;
1304         URLConnection connection = url.openConnection();
1305         if (connection instanceof HttpURLConnection) {
1306             result = addCrawlJob(url, (HttpURLConnection)connection, name,
1307                 description, seeds);
1308         } else if (connection instanceof FileURLConnection) {
1309             result = addCrawlJob(new File(url.getPath()), name, description,
1310                 seeds);
1311         } else {
1312             throw new UnsupportedOperationException("No support for "
1313                 + connection);
1314         }
1315 
1316         return result;
1317     }
1318     
1319     protected String addCrawlJob(final URL url,
1320             final HttpURLConnection connection,
1321             final String name, final String description, final String seeds)
1322     throws IOException, FatalConfigurationException {
1323         connection.connect();
1324         // Look see if its a jar file.  If it is undo it.
1325         boolean isJar = url.getPath() != null
1326                 && url.getPath().toLowerCase().endsWith(JAR_SUFFIX)
1327                 || "application/java-archive".equals(connection
1328                         .getHeaderField("Content-Type"));
1329         // If http url connection, bring down the resource local.
1330         File localFile = File.createTempFile(Heritrix.class.getName(),
1331            isJar? JAR_SUFFIX: null, TMPDIR);
1332         String result = null;
1333         try {
1334             IoUtils.readFullyToFile(connection.getInputStream(), localFile);
1335             result = addCrawlJob(localFile, name, description, seeds);
1336         } catch (IOException ioe) {
1337             // Cleanup if an Exception.
1338             localFile.delete();
1339             localFile = null;
1340         } finally {
1341              connection.disconnect();
1342              // If its a jar file, then we made a job based on the jar contents.
1343              // Its no longer needed.  Remove it.  If not a jar file, then leave
1344              // the file around because the job depends on it.
1345              if (isJar && localFile != null && localFile.exists()) {
1346                  localFile.delete();
1347              }
1348         }
1349         return result;
1350     }
1351     
1352     protected String addCrawlJob(final File order, final String name,
1353             final String description, final String seeds)
1354     throws FatalConfigurationException, IOException {
1355         CrawlJob addedJob = null;
1356         if (this.jobHandler == null) {
1357             throw new NullPointerException("Heritrix jobhandler is null.");
1358         }
1359         try {
1360             if (order.getName().toLowerCase().endsWith(JAR_SUFFIX)) {
1361                 return addCrawlJobBasedonJar(order, name, description, seeds);
1362             }
1363             addedJob = this.jobHandler.
1364                 addJob(createCrawlJob(this.jobHandler, order, name));
1365         } catch (InvalidAttributeValueException e) {
1366             FatalConfigurationException fce = new FatalConfigurationException(
1367                 "Converted InvalidAttributeValueException on " +
1368                 order.getAbsolutePath() + ": " + e.getMessage());
1369             fce.setStackTrace(e.getStackTrace());
1370         }
1371         return addedJob != null? addedJob.getUID(): null;
1372     }
1373     
1374     /***
1375      * Undo jar file and use as basis for a new job.
1376      * @param jarFile Pointer to file that holds jar.
1377      * @param name Name to use for new job.
1378      * @param description 
1379      * @param seeds 
1380      * @return Message.
1381      * @throws IOException
1382      * @throws FatalConfigurationException
1383      */
1384     protected String addCrawlJobBasedonJar(final File jarFile,
1385             final String name, final String description, final String seeds)
1386     throws IOException, FatalConfigurationException {
1387         if (jarFile == null || !jarFile.exists()) {
1388             throw new FileNotFoundException(jarFile.getAbsolutePath());
1389         }
1390         // Create a directory with a tmp name.  Do it by first creating file,
1391         // removing it, then creating the directory. There is a hole during
1392         // which the OS may put a file of same exact name in our way but
1393         // unlikely.
1394         File dir = File.createTempFile(Heritrix.class.getName(), ".expandedjar",
1395             TMPDIR);
1396         dir.delete();
1397         dir.mkdir();
1398         try {
1399             org.archive.crawler.util.IoUtils.unzip(jarFile, dir);
1400             // Expect to find an order file at least.
1401             File orderFile = new File(dir, "order.xml");
1402             if (!orderFile.exists()) {
1403                 throw new IOException("Missing order: " +
1404                     orderFile.getAbsolutePath());
1405             }
1406             CrawlJob job =
1407                 createCrawlJobBasedOn(orderFile, name, description, seeds);
1408 
1409             // Copy contents of jar into place (excluding order.xml and settings which are handled in createCrawlJobBasedOn)
1410             FileUtils.copyFiles(dir,
1411                     new FilenameFilter() {
1412                         public boolean accept(File dir, String name) {
1413                             return !name.equals("order.xml") && !name.equals("settings");
1414                         }
1415                     }, 
1416                     job.getDirectory(), false, true);
1417             addCrawlJob(job);
1418             return job.getUID();
1419         } catch (RuntimeException e) {
1420             logger.severe("problem adding crawl job from order jar " + jarFile + ": " + e);
1421             throw new FatalConfigurationException(e.toString());
1422          } finally {
1423              // After job has been added, no more need of expanded content.
1424              // (Let the caller be responsible for cleanup of jar. Sometimes
1425              // its should be deleted -- when its a local copy of a jar pulled
1426              // across the net -- wherease other times, if its a jar passed
1427              // in w/ a 'file' scheme, it shouldn't be deleted.
1428              org.archive.util.FileUtils.deleteDir(dir);
1429          }
1430     }
1431     
1432     public String addCrawlJobBasedOn(String jobUidOrProfile,
1433             String name, String description, String seeds) {
1434         try {
1435             CrawlJob cj = getJobHandler().getJob(jobUidOrProfile);
1436             if (cj == null) {
1437                 throw new InvalidAttributeValueException(jobUidOrProfile +
1438                     " is not a job UID or profile name (Job UIDs are " +
1439                     " usually the 14 digit date portion of job name).");
1440             }
1441             CrawlJob job = addCrawlJobBasedOn(
1442                 cj.getSettingsHandler().getOrderFile(), name, description,
1443                     seeds);
1444             return job.getUID();
1445         } catch (Exception e) {
1446             e.printStackTrace();
1447             return "Exception on " + jobUidOrProfile + ": " + e.getMessage();
1448         } 
1449     }
1450     
1451     protected CrawlJob addCrawlJobBasedOn(final File orderFile,
1452         final String name, final String description, final String seeds)
1453     throws FatalConfigurationException {
1454         return addCrawlJob(createCrawlJobBasedOn(orderFile, name, description,
1455                 seeds));
1456     }
1457     
1458     protected CrawlJob createCrawlJobBasedOn(final File orderFile,
1459             final String name, final String description, final String seeds)
1460     throws FatalConfigurationException {
1461         CrawlJob job = getJobHandler().newJob(orderFile, name, description,
1462                 seeds);
1463         return CrawlJobHandler.ensureNewJobWritten(job, name, description);
1464     }
1465     
1466     protected CrawlJob addCrawlJob(final CrawlJob job) {
1467         return getJobHandler().addJob(job);
1468     }
1469     
1470     public void startCrawling() {
1471         if (getJobHandler() == null) {
1472             throw new NullPointerException("Heritrix jobhandler is null.");
1473         }
1474         getJobHandler().startCrawler();
1475     }
1476 
1477     public void stopCrawling() {
1478         if (getJobHandler() == null) {
1479             throw new NullPointerException("Heritrix jobhandler is null.");
1480         }
1481         getJobHandler().stopCrawler();
1482     }
1483     
1484     /***
1485      * Get the heritrix version.
1486      *
1487      * @return The heritrix version.  May be null.
1488      */
1489     public static String getVersion() {
1490         return System.getProperty("heritrix.version");
1491     }
1492 
1493     /***
1494      * Get the job handler
1495      *
1496      * @return The CrawlJobHandler being used.
1497      */
1498     public CrawlJobHandler getJobHandler() {
1499         return this.jobHandler;
1500     }
1501 
1502     /***
1503      * Get the configuration directory.
1504      * @return The conf directory under HERITRIX_HOME or null if none can
1505      * be found.
1506      * @throws IOException
1507      */
1508     public static File getConfdir()
1509     throws IOException {
1510         return getConfdir(true);
1511     }
1512 
1513     /***
1514      * Get the configuration directory.
1515      * @param fail Throw IOE if can't find directory if true, else just
1516      * return null.
1517      * @return The conf directory under HERITRIX_HOME or null (or an IOE) if
1518      * can't be found.
1519      * @throws IOException
1520      */
1521     public static File getConfdir(final boolean fail)
1522     throws IOException {
1523         final String key = "heritrix.conf";
1524         // Look to see if heritrix.conf property passed on the cmd-line.
1525         String tmp = System.getProperty(key);
1526         // if not fall back to default $HERITIX_HOME/conf
1527         if (tmp == null || tmp.length() == 0) {
1528             return getSubDir("conf", fail);
1529         }
1530         File dir = new File(tmp);
1531         if (!dir.exists()) {
1532             if (fail) {
1533                 throw new IOException("Cannot find conf dir: " + tmp);
1534             } else {
1535                 logger.log(Level.WARNING, "Specified " + key +
1536                     " dir does not exist.  Falling back on default");
1537             }
1538             dir = getSubDir("conf", fail);
1539         }
1540         return dir;
1541     }
1542 
1543     /***
1544      * @return Returns the httpServer. May be null if one was not started.
1545      */
1546     public static SimpleHttpServer getHttpServer() {
1547         return Heritrix.httpServer;
1548     }
1549 
1550     /***
1551      * @throws IOException
1552      * @return Returns the directory under which reside the WAR files
1553      * we're to load into the servlet container.
1554      */
1555     public static File getWarsdir()
1556     throws IOException {
1557         return getSubDir("webapps");
1558     }
1559 
1560     /***
1561      * Prepars for program shutdown. This method does it's best to prepare the
1562      * program so that it can exit normally. It will kill the httpServer and
1563      * terminate any running job.<br>
1564      * It is advisible to wait a few (~1000) millisec after calling this method
1565      * and before calling performHeritrixShutDown() to allow as many threads as
1566      * possible to finish what they are doing.
1567      */
1568     public static void prepareHeritrixShutDown() {
1569         // Stop and destroy all running Heritrix instances.
1570         // Get array of the key set to avoid CCEs for case where call to
1571         // destroy does a remove of an instance from Heritrix.instances.
1572         final Object [] keys = Heritrix.instances.keySet().toArray();
1573         for (int i = 0; i < keys.length; i++) {
1574             ((Heritrix)Heritrix.instances.get(keys[i])).destroy();
1575         }
1576         
1577         try {
1578             deregisterJndi(getJndiContainerName());
1579         } catch (NameNotFoundException e) {
1580             // We were probably unbound already. Ignore.
1581             logger.log(Level.WARNING, "deregistration of jndi", e);
1582         } catch (Exception e) {
1583             e.printStackTrace();
1584         }
1585         
1586         if(Heritrix.httpServer != null) {
1587             // Shut down the web access.
1588             try {
1589                 Heritrix.httpServer.stopServer();
1590             } catch (InterruptedException e) {
1591                 // Generally this can be ignored, but we'll print a stack trace
1592                 // just in case.
1593                 e.printStackTrace();
1594             } finally {
1595                 Heritrix.httpServer = null;
1596             }
1597         }
1598     }
1599 
1600     /***
1601      * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1602      * prior to this method.
1603      */
1604     public static void performHeritrixShutDown() {
1605         performHeritrixShutDown(0);
1606     }
1607 
1608     /***
1609      * Exit program. Recommended that prepareHeritrixShutDown() be invoked
1610      * prior to this method.
1611      *
1612      * @param exitCode Code to pass System.exit.
1613      *
1614      */
1615     public static void performHeritrixShutDown(int exitCode) {
1616         System.exit(exitCode);
1617     }
1618 
1619     /***
1620      * Shutdown all running heritrix instances and the JVM.
1621      * Assumes stop has already been called.
1622 	 * @param exitCode Exit code to pass system exit.
1623 	 */
1624 	public static void shutdown(final int exitCode) {
1625         getShutdownThread(true, exitCode, "Heritrix shutdown").start();
1626 	}
1627     
1628     protected static Thread getShutdownThread(final boolean sysexit,
1629             final int exitCode, final String name) {
1630         Thread t = new Thread(name) {
1631             public void run() {
1632                 Heritrix.prepareHeritrixShutDown();
1633                 if (sysexit) {
1634                     Heritrix.performHeritrixShutDown(exitCode);
1635                 }
1636             }
1637         };
1638         t.setDaemon(true);
1639         return t;
1640     }
1641     
1642     public static void shutdown() {
1643         shutdown(0);
1644     }
1645     
1646     /***
1647      * Register Heritrix with JNDI, JMX, and with the static hashtable of all
1648      * Heritrix instances known to this JVM.
1649      * 
1650      * If launched from cmdline, register Heritrix MBean if an agent to register
1651      * ourselves with. Usually this method will only have effect if we're
1652      * running in a 1.5.0 JDK and command line options such as
1653      * '-Dcom.sun.management.jmxremote.port=8082
1654      * -Dcom.sun.management.jmxremote.authenticate=false
1655      * -Dcom.sun.management.jmxremote.ssl=false' are supplied.
1656      * See <a href="http://java.sun.com/j2se/1.5.0/docs/guide/management/agent.html">Monitoring
1657      * and Management Using JMX</a>
1658      * for more on the command line options and how to connect to the
1659      * Heritrix bean using the JDK 1.5.0 jconsole tool.  We register currently
1660      * with first server we find (TODO: Make configurable).
1661      * 
1662      * <p>If we register successfully with a JMX agent, then part of the
1663      * registration will include our registering ourselves with JNDI.
1664      * 
1665      * <p>Finally, add the heritrix instance to the hashtable of all the
1666      * Heritrix instances floating in the current VM.  This latter registeration
1667      * happens whether or no there is a JMX agent to register with.  This is
1668      * a list we keep out of convenience so its easy iterating over all
1669      *  all instances calling stop when main application is going down.
1670      * 
1671      * @param h Instance of heritrix to register.
1672      * @param name Name to use for this Heritrix instance.
1673      * @param jmxregister True if we are to register this instance with JMX.
1674      * @throws NullPointerException
1675      * @throws MalformedObjectNameException
1676      * @throws NotCompliantMBeanException 
1677      * @throws MBeanRegistrationException 
1678      * @throws InstanceAlreadyExistsException 
1679      */
1680     protected static void registerHeritrix(final Heritrix h,
1681             final String name, final boolean jmxregister)
1682     throws MalformedObjectNameException, InstanceAlreadyExistsException,
1683     MBeanRegistrationException, NotCompliantMBeanException {
1684         MBeanServer server = getMBeanServer();
1685         if (server != null) {
1686             // Are we to manage the jmx registration?  Or is it being done for
1687             // us by an external process: e.g. This instance was created by
1688             // MBeanAgent.
1689             if (jmxregister) {
1690                 ObjectName objName = (name == null || name.length() <= 0)?
1691                     getJmxObjectName(): getJmxObjectName(name);
1692                 registerMBean(server, h, objName);
1693             }
1694         } else {
1695             // JMX ain't available. Put this instance into the list of Heritrix
1696             // instances so findable by the UI (Normally this is done in the
1697             // JMX postRegister routine below).  When no JMX, can only have
1698             // one instance of Heritrix so no need to do the deregisteration.
1699             Heritrix.instances.put(h.getNoJmxName(), h);
1700         }
1701     }
1702     
1703     protected static void unregisterHeritrix(final Heritrix h)
1704     throws InstanceNotFoundException, MBeanRegistrationException,
1705             NullPointerException {
1706         MBeanServer server = getMBeanServer();
1707         if (server != null) {
1708             server.unregisterMBean(h.mbeanName);
1709         } else {
1710             // JMX ain't available. Remove from list of Heritrix instances.
1711             // Usually this is done by the JMX postDeregister below.
1712             Heritrix.instances.remove(h.getNoJmxName());
1713         }
1714     }
1715     
1716     /***
1717      * Get MBeanServer.
1718      * Currently uses first MBeanServer found.  This will definetly not be whats
1719      * always wanted. TODO: Make which server settable. Also, if none, put up
1720      * our own MBeanServer.
1721      * @return An MBeanServer to register with or null.
1722      */
1723     public static MBeanServer getMBeanServer() {
1724         MBeanServer result = null;
1725         List servers = MBeanServerFactory.findMBeanServer(null);
1726         if (servers == null) {
1727             return result;
1728         }
1729         for (Iterator i = servers.iterator(); i.hasNext();) {
1730             MBeanServer server = (MBeanServer)i.next();
1731             if (server == null) {
1732                 continue;
1733             }
1734             result = server;
1735             break;
1736         }
1737         return result;
1738     }
1739     
1740     public static MBeanServer registerMBean(final Object objToRegister,
1741             final String name, final String type)
1742     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1743     NotCompliantMBeanException {
1744         MBeanServer server = getMBeanServer();
1745         if (server != null) {
1746             server = registerMBean(server, objToRegister, name, type);
1747         }
1748         return server;
1749     }
1750     
1751     public static MBeanServer registerMBean(final MBeanServer server,
1752             final Object objToRegister, final String name, final String type)
1753     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1754     NotCompliantMBeanException {
1755         try {
1756             Hashtable<String,String> ht = new Hashtable<String,String>();
1757             ht.put(JmxUtils.NAME, name);
1758             ht.put(JmxUtils.TYPE, type);
1759             registerMBean(server, objToRegister,
1760                 new ObjectName(CRAWLER_PACKAGE, ht));
1761         } catch (MalformedObjectNameException e) {
1762             e.printStackTrace();
1763         }
1764         return server;
1765     }
1766         
1767     public static MBeanServer registerMBean(final MBeanServer server,
1768                 final Object objToRegister, final ObjectName objName)
1769     throws InstanceAlreadyExistsException, MBeanRegistrationException,
1770     NotCompliantMBeanException {
1771         server.registerMBean(objToRegister, objName);
1772         return server;
1773     }
1774     
1775     public static void unregisterMBean(final MBeanServer server,
1776             final String name, final String type) {
1777         if (server == null) {
1778             return;
1779         }
1780         try {
1781             unregisterMBean(server, getJmxObjectName(name, type));
1782         } catch (MalformedObjectNameException e) {
1783             e.printStackTrace();
1784         }
1785     }
1786             
1787     public static void unregisterMBean(final MBeanServer server,
1788             final ObjectName name) {
1789         try {
1790             server.unregisterMBean(name);
1791             logger.info("Unregistered bean " + name.getCanonicalName());
1792         } catch (InstanceNotFoundException e) {
1793             e.printStackTrace();
1794         } catch (MBeanRegistrationException e) {
1795             e.printStackTrace();
1796         } catch (NullPointerException e) {
1797             e.printStackTrace();
1798         }
1799     }
1800     
1801     /***
1802      * @return Name to use when no JMX agent available.
1803      */
1804     protected String getNoJmxName() {
1805         return this.getClass().getName();
1806     }
1807     
1808     public static ObjectName getJmxObjectName()
1809     throws MalformedObjectNameException, NullPointerException {
1810         return getJmxObjectName("Heritrix", JmxUtils.SERVICE);
1811     }
1812     
1813     public static ObjectName getJmxObjectName(final String name)
1814     throws MalformedObjectNameException, NullPointerException {
1815         return getJmxObjectName(name, JmxUtils.SERVICE);
1816     }
1817     
1818     public static ObjectName getJmxObjectName(final String name,
1819             final String type)
1820     throws MalformedObjectNameException, NullPointerException {
1821         Hashtable<String,String> ht = new Hashtable<String,String>();
1822         ht.put(JmxUtils.NAME, name);
1823         ht.put(JmxUtils.TYPE, type);
1824         return new ObjectName(CRAWLER_PACKAGE, ht);
1825     }
1826     
1827     /***
1828      * @return Returns true if Heritrix was launched from the command line.
1829      * (When launched from command line, we do stuff like put up a web server
1830      * to manage our web interface and we register ourselves with the first
1831      * available jmx agent).
1832      */
1833     public static boolean isCommandLine() {
1834         return Heritrix.commandLine;
1835     }
1836     
1837     /***
1838      * @return True if heritrix has been started.
1839      */
1840     public boolean isStarted() {
1841         return this.jobHandler != null;
1842     }
1843     
1844     public String getStatus() {
1845         StringBuffer buffer = new StringBuffer();
1846         if (this.getJobHandler() != null) {
1847             buffer.append("isRunning=");
1848             buffer.append(this.getJobHandler().isRunning());
1849             buffer.append(" isCrawling=");
1850             buffer.append(this.getJobHandler().isCrawling());
1851             buffer.append(" alertCount=");
1852             buffer.append(getAlertsCount());
1853             buffer.append(" newAlertCount=");
1854             buffer.append(getNewAlertsCount());
1855             if (this.getJobHandler().isCrawling()) {
1856                 buffer.append(" currentJob=");
1857                 buffer.append(this.getJobHandler().getCurrentJob().
1858                     getJmxJobName());
1859             }
1860         }
1861         return buffer.toString();
1862     }
1863     
1864     // Alert methods.
1865     public int getAlertsCount() {
1866         return this.alertManager.getCount();
1867     }
1868     
1869     public int getNewAlertsCount() {
1870         return this.alertManager.getNewCount();
1871     }
1872     
1873     public Vector getAlerts() {
1874         return this.alertManager.getAll();
1875     }
1876     
1877     public Vector getNewAlerts() {
1878         return this.alertManager.getNewAll();
1879     }
1880     
1881     public SinkHandlerLogRecord getAlert(final String id) {
1882         return this.alertManager.get(id);
1883     }
1884     
1885     public void readAlert(final String id) {
1886         this.alertManager.read(id);
1887     }
1888     
1889     public void removeAlert(final String id) {
1890         this.alertManager.remove(id);
1891     }
1892     
1893     /***
1894      * Start Heritrix.
1895      * 
1896      * Used by JMX and webapp initialization for starting Heritrix.
1897      * Not by the cmdline launched Heritrix. Idempotent.
1898      * If start is called by JMX, then new instance of Heritrix is automatically
1899      * registered w/ JMX Agent.  If started by webapp, need to register the new
1900      * Heritrix instance.
1901      */
1902     public void start() {
1903         // Don't start if we've been launched from the command line.
1904         // Don't start if already started.
1905         if (!Heritrix.isCommandLine() && !isStarted()) {
1906             try {
1907                 logger.info(launch());
1908             } catch (Exception e) {
1909                 e.printStackTrace();
1910             }
1911         }
1912     }
1913     
1914     /***
1915      * Stop Heritrix.
1916      * 
1917      * Used by JMX and webapp initialization for stopping Heritrix.
1918      */
1919     public void stop() {
1920         if (this.jobHandler != null) {
1921             this.jobHandler.stop();
1922         }
1923     }
1924 
1925     public String interrupt(String threadName) {
1926         String result = "Thread " + threadName + " not found";
1927         ThreadGroup group = Thread.currentThread().getThreadGroup();
1928         if (group == null) {
1929             return result;
1930         }
1931         // Back up to the root threadgroup before starting
1932         // to iterate over threads.
1933         ThreadGroup parent = null;
1934         while((parent = group.getParent()) != null) {
1935             group = parent;
1936         }
1937         // Do an array that is twice the size of active
1938         // thread count.  That should be big enough.
1939         final int max = group.activeCount() * 2;
1940         Thread [] threads = new Thread[max];
1941         int threadCount = group.enumerate(threads, true);
1942         if (threadCount >= max) {
1943             logger.info("Some threads not found...array too small: " +
1944                 max);
1945         }
1946         for (int j = 0; j < threadCount; j++) {
1947             if (threads[j].getName().equals(threadName)) {
1948                 threads[j].interrupt();
1949                 result = "Interrupt sent to " + threadName;
1950                 break;
1951             }
1952         }
1953         return result;
1954     }
1955 
1956     // OpenMBean implementation.
1957     
1958     /***
1959      * Build up the MBean info for Heritrix main.
1960      * @return Return created mbean info instance.
1961      */
1962     protected OpenMBeanInfoSupport buildMBeanInfo() {
1963         OpenMBeanAttributeInfoSupport[] attributes =
1964             new OpenMBeanAttributeInfoSupport[Heritrix.ATTRIBUTE_LIST.size()];
1965         OpenMBeanConstructorInfoSupport[] constructors =
1966             new OpenMBeanConstructorInfoSupport[1];
1967         OpenMBeanOperationInfoSupport[] operations =
1968             new OpenMBeanOperationInfoSupport[Heritrix.OPERATION_LIST.size()];
1969         MBeanNotificationInfo[] notifications =
1970             new MBeanNotificationInfo[0];
1971 
1972         // Attributes.
1973         attributes[0] =
1974             new OpenMBeanAttributeInfoSupport(Heritrix.STATUS_ATTR,
1975                 "Short basic status message", SimpleType.STRING, true,
1976                 false, false);
1977         // Attributes.
1978         attributes[1] =
1979             new OpenMBeanAttributeInfoSupport(Heritrix.VERSION_ATTR,
1980                 "Heritrix version", SimpleType.STRING, true, false, false);
1981         // Attributes.
1982         attributes[2] =
1983             new OpenMBeanAttributeInfoSupport(Heritrix.ISRUNNING_ATTR,
1984                 "Whether the crawler is running", SimpleType.BOOLEAN, true,
1985                 false, false);
1986         // Attributes.
1987         attributes[3] =
1988             new OpenMBeanAttributeInfoSupport(Heritrix.ISCRAWLING_ATTR,
1989                 "Whether the crawler is crawling", SimpleType.BOOLEAN, true,
1990                 false, false);
1991         // Attributes.
1992         attributes[4] =
1993             new OpenMBeanAttributeInfoSupport(Heritrix.ALERTCOUNT_ATTR,
1994                 "The number of alerts", SimpleType.INTEGER, true, false, false);
1995         // Attributes.
1996         attributes[5] =
1997             new OpenMBeanAttributeInfoSupport(Heritrix.NEWALERTCOUNT_ATTR,
1998                 "The number of new alerts", SimpleType.INTEGER, true, false,
1999                 false);
2000         // Attributes.
2001         attributes[6] =
2002             new OpenMBeanAttributeInfoSupport(Heritrix.CURRENTJOB_ATTR,
2003                 "The name of the job currently being crawled", 
2004                 SimpleType.STRING, true, false, false);
2005 
2006         // Constructors.
2007         constructors[0] = new OpenMBeanConstructorInfoSupport(
2008             "HeritrixOpenMBean", "Constructs Heritrix OpenMBean instance ",
2009             new OpenMBeanParameterInfoSupport[0]);
2010 
2011         // Operations.
2012         operations[0] = new OpenMBeanOperationInfoSupport(
2013             Heritrix.START_OPER, "Start Heritrix instance", null,
2014                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2015         
2016         operations[1] = new OpenMBeanOperationInfoSupport(
2017             Heritrix.STOP_OPER, "Stop Heritrix instance", null,
2018                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2019         
2020         OpenMBeanParameterInfo[] args = new OpenMBeanParameterInfoSupport[1];
2021         args[0] = new OpenMBeanParameterInfoSupport("threadName",
2022             "Name of thread to send interrupt", SimpleType.STRING);
2023         operations[2] = new OpenMBeanOperationInfoSupport(
2024             Heritrix.INTERRUPT_OPER, "Send thread an interrupt " +
2025                 "(Used debugging)", args, SimpleType.STRING,
2026                 MBeanOperationInfo.ACTION_INFO);
2027         
2028         operations[3] = new OpenMBeanOperationInfoSupport(
2029             Heritrix.START_CRAWLING_OPER, "Set Heritrix instance " +
2030                 "into crawling mode", null, SimpleType.VOID,
2031                 MBeanOperationInfo.ACTION);
2032         
2033         operations[4] = new OpenMBeanOperationInfoSupport(
2034             Heritrix.STOP_CRAWLING_OPER, "Unset Heritrix instance " +
2035                 " crawling mode", null, SimpleType.VOID,
2036                 MBeanOperationInfo.ACTION);
2037         
2038         args = new OpenMBeanParameterInfoSupport[4];
2039         args[0] = new OpenMBeanParameterInfoSupport("pathOrURL",
2040             "Path/URL to order or jar of order+seed",
2041             SimpleType.STRING);
2042         args[1] = new OpenMBeanParameterInfoSupport("name",
2043             "Basename for new job", SimpleType.STRING);
2044         args[2] = new OpenMBeanParameterInfoSupport("description",
2045             "Description to save with new job", SimpleType.STRING);
2046         args[3] = new OpenMBeanParameterInfoSupport("seeds",
2047             "Initial seed(s)", SimpleType.STRING);
2048         operations[5] = new OpenMBeanOperationInfoSupport(
2049             Heritrix.ADD_CRAWL_JOB_OPER, "Add new crawl job", args,
2050                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2051         
2052         args = new OpenMBeanParameterInfoSupport[4];
2053         args[0] = new OpenMBeanParameterInfoSupport("uidOrName",
2054             "Job UID or profile name", SimpleType.STRING);
2055         args[1] = new OpenMBeanParameterInfoSupport("name",
2056             "Basename for new job", SimpleType.STRING);
2057         args[2] = new OpenMBeanParameterInfoSupport("description",
2058             "Description to save with new job", SimpleType.STRING);
2059         args[3] = new OpenMBeanParameterInfoSupport("seeds",
2060             "Initial seed(s)", SimpleType.STRING);
2061         operations[6] = new OpenMBeanOperationInfoSupport(
2062             Heritrix.ADD_CRAWL_JOB_BASEDON_OPER,
2063             "Add a new crawl job based on passed Job UID or profile",
2064             args, SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2065         
2066         args = new OpenMBeanParameterInfoSupport[1];
2067         args[0] = new OpenMBeanParameterInfoSupport("UID",
2068             "Job UID", SimpleType.STRING);
2069         operations[7] = new OpenMBeanOperationInfoSupport(DELETE_CRAWL_JOB_OPER,
2070             "Delete/stop this crawl job", args, SimpleType.VOID,
2071             MBeanOperationInfo.ACTION);
2072         
2073         args = new OpenMBeanParameterInfoSupport[1];
2074         args[0] = new OpenMBeanParameterInfoSupport("index",
2075             "Zero-based index into array of alerts", SimpleType.INTEGER);
2076         operations[8] = new OpenMBeanOperationInfoSupport(
2077             Heritrix.ALERT_OPER, "Return alert at passed index", args,
2078                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2079         
2080         try {
2081             this.jobCompositeType = new CompositeType("job",
2082                     "Job attributes", JOB_KEYS,
2083                     new String [] {"Job unique ID", "Job name", "Job status"},
2084                     new OpenType [] {SimpleType.STRING, SimpleType.STRING,
2085                         SimpleType.STRING});
2086             this.jobsTabularType = new TabularType("jobs", "List of jobs",
2087                     this.jobCompositeType, new String [] {"uid"});
2088         } catch (OpenDataException e) {
2089             // This should never happen.
2090             throw new RuntimeException(e);
2091         }
2092         operations[9] = new OpenMBeanOperationInfoSupport(
2093             Heritrix.PENDING_JOBS_OPER,
2094                 "List of pending jobs (or null if none)", null,
2095                 this.jobsTabularType, MBeanOperationInfo.INFO);
2096         operations[10] = new OpenMBeanOperationInfoSupport(
2097                 Heritrix.COMPLETED_JOBS_OPER,
2098                     "List of completed jobs (or null if none)", null,
2099                     this.jobsTabularType, MBeanOperationInfo.INFO);
2100         
2101         args = new OpenMBeanParameterInfoSupport[2];
2102         args[0] = new OpenMBeanParameterInfoSupport("uid",
2103             "Job unique ID", SimpleType.STRING);
2104         args[1] = new OpenMBeanParameterInfoSupport("name",
2105                 "Report name (e.g. crawl-report, etc.)",
2106                 SimpleType.STRING);
2107         operations[11] = new OpenMBeanOperationInfoSupport(
2108             Heritrix.CRAWLEND_REPORT_OPER, "Return crawl-end report", args,
2109                 SimpleType.STRING, MBeanOperationInfo.ACTION_INFO);
2110         
2111         operations[12] = new OpenMBeanOperationInfoSupport(
2112             Heritrix.SHUTDOWN_OPER, "Shutdown container", null,
2113                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2114         
2115         args = new OpenMBeanParameterInfoSupport[2];
2116         args[0] = new OpenMBeanParameterInfoSupport("level",
2117             "Log level: e.g. SEVERE, WARNING, etc.", SimpleType.STRING);
2118         args[1] = new OpenMBeanParameterInfoSupport("message",
2119             "Log message", SimpleType.STRING);
2120         operations[13] = new OpenMBeanOperationInfoSupport(Heritrix.LOG_OPER,
2121             "Add a log message", args, SimpleType.VOID,
2122             MBeanOperationInfo.ACTION);
2123         
2124         operations[14] = new OpenMBeanOperationInfoSupport(
2125             Heritrix.DESTROY_OPER, "Destroy Heritrix instance", null,
2126                 SimpleType.VOID, MBeanOperationInfo.ACTION);
2127         
2128         operations[15] = new OpenMBeanOperationInfoSupport(
2129             Heritrix.TERMINATE_CRAWL_JOB_OPER,
2130             "Returns false if no current job", null, SimpleType.BOOLEAN,
2131             MBeanOperationInfo.ACTION);
2132         
2133         operations[16] = new OpenMBeanOperationInfoSupport(
2134             Heritrix.REBIND_JNDI_OPER,
2135             "Rebinds this Heritrix with JNDI.", null,
2136             SimpleType.VOID, MBeanOperationInfo.ACTION);
2137 
2138         // Build the info object.
2139         return new OpenMBeanInfoSupport(this.getClass().getName(),
2140             "Heritrix Main OpenMBean", attributes, constructors, operations,
2141             notifications);
2142     }
2143     
2144     public Object getAttribute(String attribute_name)
2145     throws AttributeNotFoundException {
2146         if (attribute_name == null) {
2147             throw new RuntimeOperationsException(
2148                  new IllegalArgumentException("Attribute name cannot be null"),
2149                  "Cannot call getAttribute with null attribute name");
2150         }
2151         if (!Heritrix.ATTRIBUTE_LIST.contains(attribute_name)) {
2152             throw new AttributeNotFoundException("Attribute " +
2153                  attribute_name + " is unimplemented.");
2154         }
2155         // The pattern in the below is to match an attribute and when found
2156         // do a return out of if clause.  Doing it this way, I can fall
2157         // on to the AttributeNotFoundException for case where we've an
2158         // attribute but no handler.
2159         if (attribute_name.equals(STATUS_ATTR)) {
2160             return getStatus();
2161         }
2162         if (attribute_name.equals(VERSION_ATTR)) {
2163             return getVersion();
2164         }
2165 
2166         if (attribute_name.equals(ISRUNNING_ATTR)) {
2167             return new Boolean(this.getJobHandler().isRunning());
2168         }
2169         if (attribute_name.equals(ISCRAWLING_ATTR)) {
2170             return new Boolean(this.getJobHandler().isCrawling());
2171         }
2172         if (attribute_name.equals(ALERTCOUNT_ATTR)) {
2173             return new Integer(getAlertsCount());
2174         }
2175         if (attribute_name.equals(NEWALERTCOUNT_ATTR)) {
2176             return new Integer(getNewAlertsCount());
2177         }
2178         if (attribute_name.equals(CURRENTJOB_ATTR)) {
2179             if (this.getJobHandler().isCrawling()) {
2180                 return this.getJobHandler().getCurrentJob().getJmxJobName();
2181             }
2182             return null;
2183         }
2184         throw new AttributeNotFoundException("Attribute " +
2185             attribute_name + " not found.");
2186     }
2187 
2188     public void setAttribute(Attribute attribute)
2189     throws AttributeNotFoundException {
2190         throw new AttributeNotFoundException("No attribute can be set in " +
2191             "this MBean");
2192     }
2193 
2194     public AttributeList getAttributes(String [] attributeNames) {
2195         if (attributeNames == null) {
2196             throw new RuntimeOperationsException(
2197                 new IllegalArgumentException("attributeNames[] cannot be " +
2198                 "null"), "Cannot call getAttributes with null attribute " +
2199                 "names");
2200         }
2201         AttributeList resultList = new AttributeList();
2202         if (attributeNames.length == 0) {
2203             return resultList;
2204         }
2205         for (int i = 0; i < attributeNames.length; i++) {
2206             try {
2207                 Object value = getAttribute(attributeNames[i]);
2208                 resultList.add(new Attribute(attributeNames[i], value));
2209             } catch (Exception e) {
2210                 e.printStackTrace();
2211             }
2212         }
2213         return(resultList);
2214     }
2215 
2216     public AttributeList setAttributes(AttributeList attributes) {
2217         return new AttributeList(); // always empty
2218     }
2219 
2220     public Object invoke(final String operationName, final Object[] params,
2221         final String[] signature)
2222     throws ReflectionException {
2223         if (operationName == null) {
2224             throw new RuntimeOperationsException(
2225                 new IllegalArgumentException("Operation name cannot be null"),
2226                 "Cannot call invoke with null operation name");
2227         }
2228         // INFO logging of JMX invokes: [#HER-907]
2229         if (logger.isLoggable(Level.INFO)) {
2230             // String paramsString = "";
2231             StringBuilder buf = new StringBuilder();
2232             for (Object o : params) {
2233                 buf.append("\"" + o + "\", ");
2234             }
2235             logger.info("JMX invoke: " + operationName + "(" + buf + ")");
2236         } 
2237         // The pattern in the below is to match an operation and when found
2238         // do a return out of if clause.  Doing it this way, I can fall
2239         // on to the MethodNotFoundException for case where we've an
2240         // attribute but no handler.
2241         if (operationName.equals(START_OPER)) {
2242             JmxUtils.checkParamsCount(START_OPER, params, 0);
2243             start();
2244             return null;
2245         }
2246         if (operationName.equals(STOP_OPER)) {
2247             JmxUtils.checkParamsCount(STOP_OPER, params, 0);
2248             stop();
2249             return null;
2250         }
2251         if (operationName.equals(DESTROY_OPER)) {
2252             JmxUtils.checkParamsCount(DESTROY_OPER, params, 0);
2253             destroy();
2254             return null;
2255         }
2256         if (operationName.equals(TERMINATE_CRAWL_JOB_OPER)) {
2257             JmxUtils.checkParamsCount(TERMINATE_CRAWL_JOB_OPER, params, 0);
2258             return new Boolean(this.jobHandler.terminateCurrentJob());
2259         }
2260         if (operationName.equals(REBIND_JNDI_OPER)) {
2261             JmxUtils.checkParamsCount(REBIND_JNDI_OPER, params, 0);
2262             try {
2263 				registerContainerJndi();
2264 			} catch (MalformedObjectNameException e) {
2265 				throw new RuntimeOperationsException(new RuntimeException(e));
2266 			} catch (UnknownHostException e) {
2267 				throw new RuntimeOperationsException(new RuntimeException(e));
2268 			} catch (NamingException e) {
2269 				throw new RuntimeOperationsException(new RuntimeException(e));
2270 			}
2271             return null;
2272         }
2273         if (operationName.equals(SHUTDOWN_OPER)) {
2274             JmxUtils.checkParamsCount(SHUTDOWN_OPER, params, 0);
2275             Heritrix.shutdown();
2276             return null;
2277         }
2278         if (operationName.equals(LOG_OPER)) {
2279             JmxUtils.checkParamsCount(LOG_OPER, params, 2);
2280             logger.log(Level.parse((String)params[0]), (String)params[1]);
2281             return null;
2282         }
2283         if (operationName.equals(INTERRUPT_OPER)) {
2284             JmxUtils.checkParamsCount(INTERRUPT_OPER, params, 1);
2285             return interrupt((String)params[0]);
2286         }       
2287         if (operationName.equals(START_CRAWLING_OPER)) {
2288             JmxUtils.checkParamsCount(START_CRAWLING_OPER, params, 0);
2289             startCrawling();
2290             return null;
2291         }
2292         if (operationName.equals(STOP_CRAWLING_OPER)) {
2293             JmxUtils.checkParamsCount(STOP_CRAWLING_OPER, params, 0);
2294             stopCrawling();
2295             return null;
2296         }
2297         if (operationName.equals(ADD_CRAWL_JOB_OPER)) {
2298             JmxUtils.checkParamsCount(ADD_CRAWL_JOB_OPER, params, 4);
2299             try {
2300                 return addCrawlJob((String)params[0], (String)params[1],
2301                     checkForEmptyPlaceHolder((String)params[2]),
2302                     checkForEmptyPlaceHolder((String)params[3]));
2303             } catch (IOException e) {
2304                 throw new RuntimeOperationsException(new RuntimeException(e));
2305             } catch (FatalConfigurationException e) {
2306                 throw new RuntimeOperationsException(new RuntimeException(e));
2307             }
2308         }
2309         if (operationName.equals(DELETE_CRAWL_JOB_OPER)) {
2310             JmxUtils.checkParamsCount(DELETE_CRAWL_JOB_OPER, params, 1);
2311             this.jobHandler.deleteJob((String)params[0]);
2312             return null;
2313         }
2314         
2315         if (operationName.equals(ADD_CRAWL_JOB_BASEDON_OPER)) {
2316             JmxUtils.checkParamsCount(ADD_CRAWL_JOB_BASEDON_OPER, params, 4);
2317             return addCrawlJobBasedOn((String)params[0], (String)params[1],
2318                     checkForEmptyPlaceHolder((String)params[2]),
2319                     checkForEmptyPlaceHolder((String)params[3]));
2320         }       
2321         if (operationName.equals(ALERT_OPER)) {
2322             JmxUtils.checkParamsCount(ALERT_OPER, params, 1);
2323             SinkHandlerLogRecord slr = null;
2324             if (this.alertManager.getCount() > 0) {
2325                 // This is creating a vector of all alerts just so I can then
2326                 // use passed index into resultant vector -- needs to be
2327                 // improved.
2328                 slr = (SinkHandlerLogRecord)this.alertManager.getAll().
2329                     get(((Integer)params[0]).intValue());
2330             }
2331             return (slr != null)? slr.toString(): null;
2332         }
2333         
2334         if (operationName.equals(PENDING_JOBS_OPER)) {
2335                 JmxUtils.checkParamsCount(PENDING_JOBS_OPER, params, 0);
2336             try {
2337                 return makeJobsTabularData(getJobHandler().getPendingJobs());
2338             } catch (OpenDataException e) {
2339                 throw new RuntimeOperationsException(new RuntimeException(e));
2340             }
2341         }
2342         
2343         if (operationName.equals(COMPLETED_JOBS_OPER)) {
2344                 JmxUtils.checkParamsCount(COMPLETED_JOBS_OPER, params, 0);
2345             try {
2346                 return makeJobsTabularData(getJobHandler().getCompletedJobs());
2347             } catch (OpenDataException e) {
2348                 throw new RuntimeOperationsException(new RuntimeException(e));
2349             }
2350         }
2351         
2352         if (operationName.equals(CRAWLEND_REPORT_OPER)) {
2353             JmxUtils.checkParamsCount(CRAWLEND_REPORT_OPER, params, 2);
2354             try {
2355                 return getCrawlendReport((String)params[0], (String) params[1]);
2356             } catch (IOException e) {
2357                 throw new RuntimeOperationsException(new RuntimeException(e));
2358             }
2359         }
2360         
2361         throw new ReflectionException(
2362             new NoSuchMethodException(operationName),
2363                 "Cannot find the operation " + operationName);
2364     }
2365     
2366     /***
2367      * Return named crawl end report for job with passed uid.
2368      * Crawler makes reports when its finished its crawl.  Use this method
2369      * to get a String version of one of these files.
2370      * @param jobUid The unique ID for the job whose reports you want to see
2371      * (Must be a completed job).
2372      * @param reportName Name of report minus '.txt' (e.g. crawl-report).
2373      * @return String version of the on-disk report.
2374      * @throws IOException 
2375      */
2376     protected String getCrawlendReport(String jobUid, String reportName)
2377     throws IOException {
2378         CrawlJob job = getJobHandler().getJob(jobUid);
2379         if (job == null) {
2380             throw new IOException("No such job: " + jobUid);
2381         }
2382         File report = new File(job.getDirectory(), reportName + ".txt");
2383         if (!report.exists()) {
2384             throw new FileNotFoundException(report.getAbsolutePath());
2385         }
2386         return FileUtils.readFileAsString(report);
2387     }
2388     
2389     protected TabularData makeJobsTabularData(List jobs)
2390     throws OpenDataException {
2391         if (jobs == null || jobs.size() == 0) {
2392             return null;
2393         }
2394         TabularData td = new TabularDataSupport(this.jobsTabularType);
2395         for (Iterator i = jobs.iterator(); i.hasNext();) {
2396             CrawlJob job = (CrawlJob)i.next();
2397             CompositeData cd = new CompositeDataSupport(this.jobCompositeType,
2398                 JOB_KEYS,
2399                 new String [] {job.getUID(), job.getJobName(), job.getStatus()});
2400             td.put(cd);
2401         }
2402         return td;
2403     }
2404     
2405     /***
2406      * If passed str has placeholder for the empty string, return the empty
2407      * string else return orginal.
2408      * Dumb jmx clients can't pass empty string so they'll pass a representation
2409      * of empty string such as ' ' or '-'.  Convert such strings to empty
2410      * string.
2411      * @param str String to check.
2412      * @return Original <code>str</code> or empty string if <code>str</code>
2413      * contains a placeholder for the empty-string (e.g. '-', or ' ').
2414      */
2415     protected String checkForEmptyPlaceHolder(String str) {
2416         return TextUtils.matches("-| +", str)? "": str;
2417     }
2418 
2419     public MBeanInfo getMBeanInfo() {
2420         return this.openMBeanInfo;
2421     }
2422     
2423     /***
2424      * @return Name this instance registered in JMX (Only available after JMX
2425      * registration).
2426      */
2427     public ObjectName getMBeanName() {
2428         return this.mbeanName;
2429     }
2430 
2431     public ObjectName preRegister(MBeanServer server, ObjectName name)
2432     throws Exception {
2433         this.mbeanServer = server;
2434         @SuppressWarnings("unchecked")
2435         Hashtable<String,String> ht = name.getKeyPropertyList();
2436         if (!ht.containsKey(JmxUtils.NAME)) {
2437             throw new IllegalArgumentException("Name property required" +
2438                 name.getCanonicalName());
2439         }
2440         if (!ht.containsKey(JmxUtils.TYPE)) {
2441             ht.put(JmxUtils.TYPE, JmxUtils.SERVICE);
2442             name = new ObjectName(name.getDomain(), ht);
2443         }
2444         this.mbeanName = addGuiPort(addVitals(name));
2445         Heritrix.instances.put(this.mbeanName.
2446             getCanonicalKeyPropertyListString(), this);
2447         return this.mbeanName;
2448     }
2449     
2450     /***
2451      * Add vital stats to passed in ObjectName.
2452      * @param name ObjectName to add to.
2453      * @return name with host, guiport, and jmxport added.
2454      * @throws UnknownHostException
2455      * @throws MalformedObjectNameException
2456      * @throws NullPointerException
2457      */
2458     protected static ObjectName addVitals(ObjectName name)
2459     throws UnknownHostException, MalformedObjectNameException,
2460     NullPointerException {
2461         @SuppressWarnings("unchecked")
2462         Hashtable<String,String> ht = name.getKeyPropertyList();
2463         if (!ht.containsKey(JmxUtils.HOST)) {
2464             ht.put(JmxUtils.HOST, InetAddress.getLocalHost().getCanonicalHostName());
2465             name = new ObjectName(name.getDomain(), ht);
2466         }
2467         if (!ht.containsKey(JmxUtils.JMX_PORT)) {
2468             // Add jdk jmx-port. This will be present if we've attached
2469             // ourselves to the jdk jmx agent.  Otherwise, we've been
2470             // deployed in a j2ee container with its own jmx agent.  In
2471             // this case we won't know how to get jmx port.
2472             String p = System.getProperty("com.sun.management.jmxremote.port");
2473             if (p != null && p.length() > 0) {
2474                 ht.put(JmxUtils.JMX_PORT, p);
2475                 name = new ObjectName(name.getDomain(), ht);
2476             }
2477         }
2478         return name;
2479     }
2480     
2481     protected static ObjectName addGuiPort(ObjectName name)
2482     throws MalformedObjectNameException, NullPointerException {
2483         @SuppressWarnings("unchecked")
2484         Hashtable<String,String> ht = name.getKeyPropertyList();
2485         if (!ht.containsKey(JmxUtils.GUI_PORT)) {
2486             // Add gui port if this instance was started with a gui.
2487             if (Heritrix.gui) {
2488                 ht.put(JmxUtils.GUI_PORT, Integer.toString(Heritrix.guiPort));
2489                 name = new ObjectName(name.getDomain(), ht);
2490             }
2491         }
2492         return name;
2493     }
2494 
2495     public void postRegister(Boolean registrationDone) {
2496         if (logger.isLoggable(Level.INFO)) {
2497             logger.info(
2498                 JmxUtils.getLogRegistrationMsg(this.mbeanName.getCanonicalName(),
2499                 this.mbeanServer, registrationDone.booleanValue()));
2500         }
2501         try {
2502             registerJndi(this.mbeanName);
2503         } catch (Exception e) {
2504             logger.log(Level.SEVERE, "Failed jndi registration", e);
2505         }
2506     }
2507 
2508     public void preDeregister() throws Exception {
2509         deregisterJndi(this.mbeanName);
2510     }
2511 
2512     public void postDeregister() {
2513         Heritrix.instances.
2514             remove(this.mbeanName.getCanonicalKeyPropertyListString());
2515         if (logger.isLoggable(Level.INFO)) {
2516             logger.info(JmxUtils.getLogUnregistrationMsg(
2517                     this.mbeanName.getCanonicalName(), this.mbeanServer));
2518         }
2519     }
2520     
2521     protected static void registerContainerJndi()
2522     throws MalformedObjectNameException, NullPointerException,
2523     		UnknownHostException, NamingException {
2524     	registerJndi(getJndiContainerName());
2525     }
2526 
2527     protected static void registerJndi(final ObjectName name)
2528     throws NullPointerException, NamingException {
2529     	Context c = getJndiContext();
2530     	if (c == null) {
2531     		return;
2532     	}
2533         CompoundName key = JndiUtils.bindObjectName(c, name);
2534         if (logger.isLoggable(Level.FINE)) {
2535             logger.fine("Bound '"  + key + "' to '" + JndiUtils.
2536                getCompoundName(c.getNameInNamespace()).toString()
2537                + "' jndi context");
2538         }
2539     }
2540     
2541     protected static void deregisterJndi(final ObjectName name)
2542     throws NullPointerException, NamingException {
2543     	Context c = getJndiContext();
2544     	if (c == null) {
2545     		return;
2546     	}
2547         CompoundName key = JndiUtils.unbindObjectName(c, name);
2548         if (logger.isLoggable(Level.FINE)) {
2549             logger.fine("Unbound '" + key + "' from '" +
2550                 JndiUtils.getCompoundName(c.getNameInNamespace()).toString() +
2551                 	"' jndi context");
2552         }
2553     }
2554     
2555     /***
2556      * @return Jndi context for the crawler or null if none found.
2557      * @throws NamingException 
2558      */
2559     protected static Context getJndiContext() throws NamingException {
2560     	Context c = null;
2561     	try {
2562     		c = JndiUtils.getSubContext(CRAWLER_PACKAGE);
2563     	} catch (NoInitialContextException e) {
2564     		logger.fine("No JNDI Context: " + e.toString());
2565     	}
2566     	return c;
2567     }
2568     
2569     /***
2570      * @return Jndi container name -- the name to use for the 'container' that
2571      * can host zero or more heritrix instances (Return a JMX ObjectName.  We
2572      * use ObjectName because then we're sync'd with JMX naming and ObjectName
2573      * has nice parsing).
2574      * @throws NullPointerException 
2575      * @throws MalformedObjectNameException 
2576      * @throws UnknownHostException 
2577      */
2578     protected static ObjectName getJndiContainerName()
2579     throws MalformedObjectNameException, NullPointerException,
2580     UnknownHostException {
2581         ObjectName objName = new ObjectName(CRAWLER_PACKAGE, "type",
2582             "container");
2583         return addVitals(objName);
2584     }
2585     
2586     /***
2587      * @return Return all registered instances of Heritrix (Rare are there 
2588      * more than one).
2589      */
2590     public static Map getInstances() {
2591         return Heritrix.instances;
2592     }
2593     
2594     /***
2595      * @return True if only one instance of Heritrix.
2596      */
2597     public static boolean isSingleInstance() {
2598         return Heritrix.instances != null && Heritrix.instances.size() == 1;
2599     }
2600     
2601     /***
2602      * @return Returns single instance or null if no instance or multiple.
2603      */
2604     public static Heritrix getSingleInstance() {
2605         return !isSingleInstance()?
2606             null:
2607             (Heritrix)Heritrix.instances.
2608                 get(Heritrix.instances.keySet().iterator().next());
2609     }
2610 }