View Javadoc

1   /* CrawlJobHandler
2    *
3    * $Id: CrawlJobHandler.java 6974 2010-10-26 00:01:53Z nlevitt $
4    *
5    * Copyright (C) 2003 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.admin;
24  
25  import java.io.BufferedReader;
26  import java.io.BufferedWriter;
27  import java.io.File;
28  import java.io.FileOutputStream;
29  import java.io.FilenameFilter;
30  import java.io.IOException;
31  import java.io.InputStream;
32  import java.io.InputStreamReader;
33  import java.io.OutputStreamWriter;
34  import java.net.URI;
35  import java.net.URL;
36  import java.util.ArrayList;
37  import java.util.Comparator;
38  import java.util.Enumeration;
39  import java.util.Iterator;
40  import java.util.List;
41  import java.util.TreeSet;
42  import java.util.logging.Level;
43  import java.util.logging.Logger;
44  
45  import javax.management.Attribute;
46  import javax.management.AttributeNotFoundException;
47  import javax.management.InvalidAttributeValueException;
48  import javax.management.MBeanException;
49  import javax.management.ReflectionException;
50  
51  import org.apache.commons.httpclient.URIException;
52  import org.archive.crawler.Heritrix;
53  import org.archive.crawler.datamodel.CrawlOrder;
54  import org.archive.crawler.event.CrawlStatusListener;
55  import org.archive.crawler.framework.FrontierMarker;
56  import org.archive.crawler.framework.exceptions.FatalConfigurationException;
57  import org.archive.crawler.framework.exceptions.InitializationException;
58  import org.archive.crawler.framework.exceptions.InvalidFrontierMarkerException;
59  import org.archive.crawler.frontier.FrontierJournal;
60  import org.archive.crawler.settings.ComplexType;
61  import org.archive.crawler.settings.CrawlerSettings;
62  import org.archive.crawler.settings.SettingsHandler;
63  import org.archive.crawler.settings.XMLSettingsHandler;
64  import org.archive.util.ArchiveUtils;
65  import org.archive.util.FileUtils;
66  
67  
68  /***
69   * This class manages CrawlJobs. Submitted crawl jobs are queued up and run
70   * in order when the crawler is running.
71   * <p>Basically this provides a layer between any potential user interface and
72   * the CrawlJobs.  It keeps the lists of completed jobs, pending jobs, etc.
73   * <p>
74   * The jobs managed by the handler can be divided into the following:
75   * <ul>
76   *  <li> <code>Pending</code> - Jobs that are ready to run and are waiting their
77   *                              turn. These can be edited, viewed, deleted etc.
78   *  <li> <code>Running</code> - Only one job can be running at a time. There may
79   *                              be no job running. The running job can be viewed
80   *                              and edited to some extent. It can also be
81   *                              terminated. This job should have a
82   *                              StatisticsTracking module attached to it for more
83   *                              details on the crawl.
84   * <li><code>Completed</code> - Jobs that have finished crawling or have been
85   *                              deleted from the pending queue or terminated
86   *                              while running. They can not be edited but can be
87   *                              viewed. They retain the StatisticsTracking
88   *                              module from their run.
89   *  <li> <code>New job</code> - At any given time their can be one 'new job' the
90   *                              new job is not considered ready to run. It can
91   *                              be edited or discarded (in which case it will be
92   *                              totally destroyed, including any files on disk).
93   *                              Once an operator deems the job ready to run it
94   *                              can be moved to the pending queue.
95   * <li> <code>Profiles</code> - Jobs under profiles are not actual jobs. They can
96   *                              be edited normally but can not be submitted to
97   *                              the pending queue. New jobs can be created
98   *                              using a profile as it's template.
99   *
100  * @author Kristinn Sigurdsson
101  *
102  * @see org.archive.crawler.admin.CrawlJob
103  */
104 
105 public class CrawlJobHandler implements CrawlStatusListener {
106     private static final Logger logger =
107         Logger.getLogger(CrawlJobHandler.class.getName());
108 
109     /***
110      * Name of system property whose specification overrides default profile
111      * used.
112      *
113      */
114     public static final String DEFAULT_PROFILE_NAME
115         = "heritrix.default.profile";
116 
117     /***
118      * Default profile name.
119      */
120     public static final String DEFAULT_PROFILE = "default";
121     
122     /***
123      * Name of the profiles directory.
124      */
125     public static final String PROFILES_DIR_NAME = "profiles";
126     
127     public static final String ORDER_FILE_NAME = "order.xml";
128 
129     /***
130      * Job currently being crawled.
131      */
132     private CrawlJob currentJob = null;
133     
134     /***
135      * A new job that is being created/configured. Not yet ready for crawling.
136      */
137     private CrawlJob newJob = null;
138 
139 	/***
140 	 * Thread to start the next job in background
141      */    
142     private Thread startingNextJob = null;
143 
144     /***
145      * A list of pending CrawlJobs.
146      */
147     private TreeSet<CrawlJob> pendingCrawlJobs;
148 
149     /***
150      * A list of completed CrawlJobs.
151      */
152     //private Vector completedCrawlJobs = new Vector();
153     private TreeSet<CrawlJob> completedCrawlJobs;
154 
155     /***
156      * A list of profile CrawlJobs.
157      */
158     private TreeSet<CrawlJob> profileJobs;
159     
160     // The UIDs of profiles should be NOT be timestamps. A descriptive name is
161     // ideal.
162     private String defaultProfile = null;
163 
164     /***
165      * If true the crawler is 'running'. That is the next pending job will start
166      * crawling as soon as the current job (if any) is completed.
167      */
168     private boolean running = false;
169     
170     /***
171      * String to indicate recovery should be based on the recovery log, not
172      * based on checkpointing.
173      */
174     public static final String RECOVER_LOG = "recover";
175     
176     /***
177      * Jobs directory.
178      */
179     private final File jobsDir;
180     
181     /***
182      * Constructor.
183      * @param jobsDir Jobs directory.
184      */
185     public CrawlJobHandler(final File jobsDir) {
186         this(jobsDir, true, true);
187     }
188 
189     /***
190      * Constructor allowing for optional loading of profiles and jobs.
191      * @param jobsDir Jobs directory.
192      * @param loadJobs If true then any applicable jobs will be loaded.
193      * @param loadProfiles If true then any applicable profiles will be loaded.
194      */
195     public CrawlJobHandler(final File jobsDir,
196             final boolean loadJobs, final boolean loadProfiles) {
197         this.jobsDir = jobsDir;
198         // Make a comparator for CrawlJobs.
199         Comparator<CrawlJob> comp = new Comparator<CrawlJob>(){
200             public int compare(CrawlJob job1, CrawlJob job2) {
201                 if( job1.getJobPriority() < job2.getJobPriority() ){
202                     return -1;
203                 } else if( job1.getJobPriority() > job2.getJobPriority() ){
204                     return 1;
205                 } else {
206                     // Same priority, use UID (which should be a timestamp).
207                     // Lower UID (string compare) means earlier time.
208                     return job1.getUID().compareTo(job2.getUID());
209                 }
210             }
211         };
212         this.pendingCrawlJobs = new TreeSet<CrawlJob>(comp);
213         this.completedCrawlJobs = new TreeSet<CrawlJob>(comp);
214         // Profiles always have the same priority so it will be sorted by name
215         this.profileJobs = new TreeSet<CrawlJob>(comp);
216         if (loadProfiles){
217             loadProfiles();
218         }
219         if (loadJobs){
220             loadJobs();
221         }
222     }
223     
224     /***
225      * Find the state.job file in the job directory.
226      * @param jobDir Directory to look in.
227      * @return Full path to 'state.job' file or null if none found.
228      */
229     protected File getStateJobFile(final File jobDir) {
230         // Need to find job file ('state.job').
231         File[] jobFiles = jobDir.listFiles(new FilenameFilter() {
232             public boolean accept(File dir, String name) {
233                 return name.toLowerCase().endsWith(".job") &&
234                     (new File(dir, name)).canRead();
235             }
236             
237         });
238         return (jobFiles.length == 1)? jobFiles[0]: null;
239     }
240 
241     /***
242      * Loads any availible jobs in the jobs directory.
243      * <p>
244      * Availible jobs are any directory containing a file called
245      * <code>state.job</code>. The file must contain valid job information.
246      */
247     private void loadJobs() {
248         this.jobsDir.mkdirs();
249         File[] jobs = this.jobsDir.listFiles();
250         for (int i = 0; i < jobs.length; i++) {
251             if (jobs[i].isDirectory()) {
252                 File jobFile = getStateJobFile(jobs[i]);
253                 if (jobFile != null) {
254                     loadJob(jobFile);
255                 }
256             }
257         }
258     }
259 
260     /***
261      * Loads a job given a specific job file. The loaded job will be placed in
262      * the list of completed jobs or pending queue depending on its status.
263      * Running jobs will have their status set to 'finished abnormally' and put
264      * into the completed list.
265      * @param job The job file of the job to load.
266      */
267     protected void loadJob(final File job) {
268         CrawlJob cjob = null;
269         try {
270             // Load the CrawlJob
271             cjob = new CrawlJob(job, new CrawlJobErrorHandler());
272         } catch (InvalidJobFileException e) {
273             logger.log(Level.INFO,
274                     "Invalid job file for " + job.getAbsolutePath(), e);
275             return;
276         } catch (IOException e) {
277             logger.log(Level.INFO, "IOException for " + job.getName() +
278                     ", " + job.getAbsolutePath(), e);
279             return;
280         }
281         
282         // TODO: Move test into CrawlJob.
283         // Check job status and place it accordingly.
284         if (cjob.getStatus().equals(CrawlJob.STATUS_RUNNING)
285                 || cjob.getStatus().equals(CrawlJob.STATUS_PAUSED)
286                 || cjob.getStatus().equals(CrawlJob.STATUS_CHECKPOINTING)
287                 || cjob.getStatus().equals(CrawlJob.STATUS_WAITING_FOR_PAUSE) ){
288             // Was a running job.
289             cjob.setStatus(CrawlJob.STATUS_FINISHED_ABNORMAL);
290             this.completedCrawlJobs.add(cjob);
291         } else if( cjob.getStatus().equals(CrawlJob.STATUS_PENDING) ) {
292             // Was a pending job.
293             this.pendingCrawlJobs.add(cjob);
294         } else if( cjob.getStatus().equals(CrawlJob.STATUS_CREATED)
295                 || cjob.getStatus().equals(CrawlJob.STATUS_DELETED) ) {
296             // Ignore for now. TODO: Add to 'recycle bin'
297         } else {
298             // Must have been completed.
299             this.completedCrawlJobs.add(cjob);
300         }
301     }
302 
303     /***
304      * Looks in conf dir for a profiles dir.
305      * @return the directory where profiles are stored else null if none
306      * available
307      * @throws IOException
308      */
309     private File getProfilesDirectory() throws IOException {
310         URL webappProfilePath = Heritrix.class.getResource("/" + 
311             PROFILES_DIR_NAME);
312         if (webappProfilePath != null) {
313             try {
314                 return new File(new URI(webappProfilePath.toString()));
315             } catch (java.lang.IllegalArgumentException e) {
316                 // e.g. "profiles" within a jar file
317                 // try Heritrix.getConfdir() in this case
318             } catch (java.net.URISyntaxException e) {
319                 e.printStackTrace();
320             }
321         }
322         return (Heritrix.getConfdir(false) == null)? null:
323             new File(Heritrix.getConfdir().getAbsolutePath(), 
324                 PROFILES_DIR_NAME);        
325     }
326 
327     /***
328      * Loads the default profile and all other profiles found on disk.
329      */
330     private void loadProfiles() {
331         boolean loadedDefault = false;
332         File profileDir = null;
333 		try {
334 			profileDir = getProfilesDirectory();
335 		} catch (IOException e) {
336 			e.printStackTrace();
337 		}
338 		if (profileDir != null) {
339             File[] ps = profileDir.listFiles();
340             if (ps != null && ps.length > 0) {
341                 for (int i = 0; i < ps.length; i++) {
342                     File f = ps[i];
343                     if (f.isDirectory()) {
344                         // Each directory in the profiles directory should
345                         // contain the file order.xml.
346                         File profile = new File(f, ORDER_FILE_NAME);
347                         if (profile.canRead()) {
348                             boolean b = loadProfile(profile);
349                             if (b) {
350                                 loadedDefault = b;
351                             }
352                         }
353                     }
354                 }
355             }
356         }
357         // Now add in the default profile.  Its on the CLASSPATH and needs
358         // special handling.  Don't add if already a default present.
359         String parent = File.separator + PROFILES_DIR_NAME + File.separator;
360         if (!loadedDefault) {
361             loadProfile(new File(parent + DEFAULT_PROFILE, ORDER_FILE_NAME));
362         }
363         // Look to see if a default profile system property has been
364         // supplied. If so, use it instead.
365         // TODO: Try and read default profile from some permanent storage.
366         defaultProfile = DEFAULT_PROFILE;
367     }
368     
369     /***
370      * Load one profile.
371      * @param profile Profile to load.
372      * @return True if loaded profile was the default profile.
373      */
374     protected boolean loadProfile(File profile) {
375         boolean loadedDefault = false;
376         // Ok, got the order file for this profile.
377         try {
378             // The directory name denotes the profiles UID and name.
379             XMLSettingsHandler newSettingsHandler =
380                 new XMLSettingsHandler(profile);
381             CrawlJobErrorHandler cjseh =
382                 new CrawlJobErrorHandler(Level.SEVERE);
383             newSettingsHandler.
384                 setErrorReportingLevel(cjseh.getLevel());
385             newSettingsHandler.initialize();
386             addProfile(new CrawlJob(profile.getParentFile().getName(),
387                 newSettingsHandler, cjseh));
388             loadedDefault = profile.getParentFile().getName().
389                 equals(DEFAULT_PROFILE);
390         } catch (InvalidAttributeValueException e) {
391             System.err.println("Failed to load profile '" +
392                     profile.getParentFile().getName() +
393                     "'. InvalidAttributeValueException.");
394         }
395         return loadedDefault;
396     }
397 
398     /***
399      * Add a new profile
400      * @param profile The new profile
401      */
402     public synchronized void addProfile(CrawlJob profile){
403         profileJobs.add(profile);
404     }
405     
406     public synchronized void deleteProfile(CrawlJob cj) throws IOException {
407         File d = getProfilesDirectory();
408         File p = new File(d, cj.getJobName());
409         if (!p.exists()) {
410             throw new IOException("No profile named " + cj.getJobName() +
411                 " at " + d.getAbsolutePath());
412         }
413         FileUtils.deleteDir(p);
414         this.profileJobs.remove(cj);
415     }
416 
417     /***
418      * Returns a List of all known profiles.
419      * @return a List of all known profiles.
420      */
421     public synchronized List<CrawlJob> getProfiles(){
422         ArrayList<CrawlJob> tmp = new ArrayList<CrawlJob>(profileJobs.size());
423         tmp.addAll(profileJobs);
424         return tmp;
425     }
426 
427     /***
428      * Submit a job to the handler. Job will be scheduled for crawling. At
429      * present it will not take the job's priority into consideration.
430      *
431      * @param job A new job for the handler
432      * @return CrawlJob that was added or null.
433      */
434     public CrawlJob addJob(CrawlJob job) {
435         if(job.isProfile()){
436             return null;     // Can't crawl profiles.
437         }
438         job.setStatus(CrawlJob.STATUS_PENDING);
439         if(job.isNew()){
440             // Are adding the new job to the pending queue.
441             this.newJob = null;
442             job.setNew(false);
443         }
444         this.pendingCrawlJobs.add(job);
445         if(isCrawling() == false && isRunning()) {
446             // Start crawling
447             startNextJob();
448         }
449         return job;
450     }
451 
452     /***
453      * Returns the default profile. If no default profile has been set it will
454      * return the first profile that was set/loaded and still exists. If no
455      * profiles exist it will return null
456      * @return the default profile.
457      */
458     public synchronized CrawlJob getDefaultProfile() {
459         if(defaultProfile != null){
460             for(Iterator it = profileJobs.iterator(); it.hasNext();) {
461                 CrawlJob item = (CrawlJob)it.next();
462                 if(item.getJobName().equals(defaultProfile)){
463                     // Found it.
464                     return item;
465                 }
466             }
467         }
468         if(profileJobs.size() > 0){
469             return (CrawlJob)profileJobs.first();
470         }
471         return null;
472     }
473 
474     /***
475      * Set the default profile.
476      * @param profile The new default profile. The following must apply to it.
477      *                profile.isProfile() should return true and
478      *                this.getProfiles() should contain it.
479      */
480     public void setDefaultProfile(CrawlJob profile) {
481         defaultProfile = profile.getJobName();
482         // TODO: Make changes to default profile durable across restarts.
483     }
484 
485     /***
486      * A List of all pending jobs
487      *
488      * @return A List of all pending jobs.
489      * No promises are made about the order of the list
490      */
491     public List<CrawlJob> getPendingJobs() {
492         ArrayList<CrawlJob> tmp
493          = new ArrayList<CrawlJob>(pendingCrawlJobs.size());
494         tmp.addAll(pendingCrawlJobs);
495         return tmp;
496     }
497 
498     /***
499      * @return The job currently being crawled.
500      */
501     public CrawlJob getCurrentJob() {
502         return currentJob;
503     }
504 
505     /***
506      * @return A List of all finished jobs.
507      */
508     public List<CrawlJob> getCompletedJobs() {
509         ArrayList<CrawlJob> tmp
510          = new ArrayList<CrawlJob>(completedCrawlJobs.size());
511         tmp.addAll(completedCrawlJobs);
512         return tmp;
513     }
514 
515     /***
516      * Return a job with the given UID.
517      * Doesn't matter if it's pending, currently running, has finished running
518      * is new or a profile.
519      *
520      * @param jobUID The unique ID of the job.
521      * @return The job with the UID or null if no such job is found
522      */
523     public CrawlJob getJob(String jobUID) {
524         if (jobUID == null){
525             return null; // UID can't be null
526         }
527         // First check currently running job
528         if (currentJob != null && currentJob.getUID().equals(jobUID)) {
529             return currentJob;
530         } else if (newJob != null && newJob.getUID().equals(jobUID)) {
531             // Then check the 'new job'
532             return newJob;
533         } else {
534             // Then check pending jobs.
535             Iterator itPend = pendingCrawlJobs.iterator();
536             while (itPend.hasNext()) {
537                 CrawlJob cj = (CrawlJob) itPend.next();
538                 if (cj.getUID().equals(jobUID)) {
539                     return cj;
540                 }
541             }
542 
543             // Next check completed jobs.
544             Iterator itComp = completedCrawlJobs.iterator();
545             while (itComp.hasNext()) {
546                 CrawlJob cj = (CrawlJob) itComp.next();
547                 if (cj.getUID().equals(jobUID)) {
548                     return cj;
549                 }
550             }
551 
552             // And finally check the profiles.
553             for (Iterator i = getProfiles().iterator(); i.hasNext();) {
554                 CrawlJob cj = (CrawlJob) i.next();
555                 if (cj.getUID().equals(jobUID)) {
556                     return cj;
557                 }
558             }
559         }
560         return null; // Nothing found, return null
561     }
562     
563     /***
564      * @return True if we terminated a current job (False if no job to
565      * terminate)
566      */
567     public boolean terminateCurrentJob() {
568         if (this.currentJob == null) {
569             return false;
570         }
571         
572         if(startingNextJob != null) {
573             try {
574                 startingNextJob.join();
575             } catch (InterruptedException e) {
576                 e.printStackTrace();
577             }
578         }
579 
580         // requestCrawlStop will cause crawlEnding to be invoked.
581         // It will handle the clean up.
582         this.currentJob.stopCrawling();
583         synchronized (this) {
584             try {
585                 // Take a few moments so that the controller can change
586                 // states before the UI updates. The CrawlEnding event
587                 // will wake us if it occurs sooner than this.
588                 wait(3000);
589             } catch (InterruptedException e) {
590                 // Ignore.
591             }
592         }
593         return true;
594     }
595 
596     /***
597      * The specified job will be removed from the pending queue or aborted if
598      * currently running.  It will be placed in the list of completed jobs with
599      * appropriate status info. If the job is already in the completed list or
600      * no job with the given UID is found, no action will be taken.
601      *
602      * @param jobUID The UID (unique ID) of the job that is to be deleted.
603      *
604      */
605     public void deleteJob(String jobUID) {
606         // First check to see if we are deleting the current job.
607         if (currentJob != null && jobUID.equals(currentJob.getUID())) {
608             terminateCurrentJob();
609             return; // We're not going to find another job with the same UID
610         }
611         
612         // Ok, it isn't the current job, let's check the pending jobs.
613         for(Iterator it = pendingCrawlJobs.iterator(); it.hasNext();) {
614             CrawlJob cj = (CrawlJob) it.next();
615             if (cj.getUID().equals(jobUID)) {
616                 // Found the one to delete.
617                 cj.setStatus(CrawlJob.STATUS_DELETED);
618                 it.remove();
619                 return; // We're not going to find another job with the same UID
620             }
621         }
622         
623         // And finally the completed jobs.
624         for (Iterator it = completedCrawlJobs.iterator(); it.hasNext();) {
625             CrawlJob cj = (CrawlJob) it.next();
626             if (cj.getUID().equals(jobUID)) {
627                 // Found the one to delete.
628                 cj.setStatus(CrawlJob.STATUS_DELETED);
629                 it.remove();
630                 return; // No other job will have the same UID
631             }
632         }
633     }
634 
635     /***
636      * Cause the current job to pause. If no current job is crawling this
637      * method will have no effect. 
638      */
639     public void pauseJob() {
640         if (this.currentJob != null) {
641             this.currentJob.pause();
642         }
643     }
644 
645     /***
646      * Cause the current job to resume crawling if it was paused. Will have no
647      * effect if the current job was not paused or if there is no current job.
648      * If the current job is still waiting to pause, this will not take effect
649      * until the job has actually paused. At which time it will immeditatly
650      * resume crawling.
651      */
652     public void resumeJob() {
653         if (this.currentJob != null) {
654             this.currentJob.resume();
655         }
656     }
657 
658     /***
659      * Cause the current job to write a checkpoint to disk. Currently
660      * requires job to already be paused.
661      * @throws IllegalStateException Thrown if crawl is not paused.
662      */
663     public void checkpointJob() throws IllegalStateException {
664         if (this.currentJob != null) {
665             this.currentJob.checkpoint();
666         }
667     }
668 
669     /***
670      * Returns a unique job ID.
671      * <p>
672      * No two calls to this method (on the same instance of this class) can ever
673      * return the same value. <br>
674      * Currently implemented to return a time stamp. That is subject to change
675      * though.
676      *
677      * @return A unique job ID.
678      *
679      * @see ArchiveUtils#TIMESTAMP17
680      */
681     public String getNextJobUID() {
682         return ArchiveUtils.get17DigitDate();
683     }
684 
685     /***
686      * Creates a new job. The new job will be returned and also registered as
687      * the handler's 'new job'. The new job will be based on the settings
688      * provided but created in a new location on disk.
689      *
690      * @param baseOn
691      *            A CrawlJob (with a valid settingshandler) to use as the
692      *            template for the new job.
693      * @param recovery Whether to preinitialize new job as recovery of
694      * <code>baseOn</code> job.  String holds RECOVER_LOG if we are to
695      * do the recovery based off the recover.gz log -- See RecoveryJournal in
696      * the frontier package -- or it holds the name of
697      * the checkpoint we're to use recoverying.
698      * @param name
699      *            The name of the new job.
700      * @param description
701      *            Descriptions of the job.
702      * @param seeds
703      *            The contents of the new settings' seed file.
704      * @param priority
705      *            The priority of the new job.
706      *
707      * @return The new crawl job.
708      * @throws FatalConfigurationException If a problem occurs creating the
709      *             settings.
710      */
711     public CrawlJob newJob(CrawlJob baseOn, String recovery, String name,
712             String description, String seeds, int priority)
713     throws FatalConfigurationException {
714         // See what the recover story is.
715         File recover = null;
716         try {
717             if (recovery != null && recovery.length() > 0
718                     && recovery.equals(RECOVER_LOG)) {
719                 // Then we're to do a recovery based off the RecoveryJournal
720                 // recover.gz log.
721                 File dir = baseOn.getSettingsHandler().getOrder()
722                     .getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
723                 // Add name of recover file.  We're hardcoding it as
724                 // 'recover.gz'.
725                 recover = new File(dir, FrontierJournal.LOGNAME_RECOVER);
726             } else if (recovery != null && recovery.length() > 0) {
727                 // Must be name of a checkpoint to use.
728                 recover = new File(baseOn.getSettingsHandler().
729                     getOrder().getSettingsDir(CrawlOrder.ATTR_CHECKPOINTS_PATH),
730                         recovery);
731             }
732         } catch (AttributeNotFoundException e1) {
733             throw new FatalConfigurationException(
734                 "AttributeNotFoundException occured while setting up" +
735                     "new job/profile " + name + " \n" + e1.getMessage());
736         }
737 
738         CrawlJob cj = createNewJob(baseOn.getSettingsHandler().getOrderFile(),
739             name, description, seeds, priority);
740     
741         updateRecoveryPaths(recover, cj.getSettingsHandler(), name);
742         
743         return cj;
744     }
745     
746     /***
747      * Creates a new job. The new job will be returned and also registered as
748      * the handler's 'new job'. The new job will be based on the settings
749      * provided but created in a new location on disk.
750      * @param orderFile Order file to use as the template for the new job.
751      * @param name The name of the new job.
752      * @param description Descriptions of the job.
753      * @param seeds The contents of the new settings' seed file.
754      *
755      * @return The new crawl job.
756      * @throws FatalConfigurationException If a problem occurs creating the
757      *             settings.
758      */
759     public CrawlJob newJob(final File orderFile, final String name,
760         final String description, final String seeds)
761     throws FatalConfigurationException {
762         return createNewJob(orderFile, name, description, seeds,
763             CrawlJob.PRIORITY_AVERAGE);
764     }
765     
766     protected void checkDirectory(File dir)
767     throws FatalConfigurationException {
768         if (dir == null) {
769             return;
770         }
771         if (!dir.exists() && !dir.canRead()) {
772             throw new FatalConfigurationException(dir.getAbsolutePath() +
773                 " does not exist or is unreadable");
774         }
775     }
776     
777     protected CrawlJob createNewJob(final File orderFile, final String name,
778             final String description, final String seeds, final int priority)
779     throws FatalConfigurationException {
780         if (newJob != null) {
781             //There already is a new job. Discard it.
782             discardNewJob();
783         }
784         String UID = getNextJobUID();
785         File jobDir;
786         jobDir = new File(this.jobsDir, name + "-" + UID);
787         CrawlJobErrorHandler errorHandler = new CrawlJobErrorHandler();
788         XMLSettingsHandler handler =
789             createSettingsHandler(orderFile, name, description,
790                 seeds, jobDir, errorHandler, "order.xml", "seeds.txt");
791         this.newJob = new CrawlJob(UID, name, handler, errorHandler, priority,
792                 jobDir);
793         return this.newJob;
794     }
795 
796     /***
797      * Creates a new profile. The new profile will be returned and also
798      * registered as the handler's 'new job'. The new profile will be based on
799      * the settings provided but created in a new location on disk.
800      *
801      * @param baseOn
802      *            A CrawlJob (with a valid settingshandler) to use as the
803      *            template for the new profile.
804      * @param name
805      *            The name of the new profile.
806      * @param description
807      *            Description of the new profile
808      * @param seeds
809      *            The contents of the new profiles' seed file
810      * @return The new profile.
811      * @throws FatalConfigurationException
812      * @throws IOException
813      */
814     public CrawlJob newProfile(CrawlJob baseOn, String name, String description,
815             String seeds)
816     throws FatalConfigurationException, IOException {
817         File profileDir = new File(getProfilesDirectory().getAbsoluteFile()
818             + File.separator + name);
819         CrawlJobErrorHandler cjseh = new CrawlJobErrorHandler(Level.SEVERE);
820         CrawlJob newProfile = new CrawlJob(name,
821             createSettingsHandler(baseOn.getSettingsHandler().getOrderFile(),
822                 name, description, seeds, profileDir, cjseh, "order.xml",
823                 "seeds.txt"), cjseh);
824         addProfile(newProfile);
825         return newProfile;
826     }
827     
828     /***
829      * Creates a new settings handler based on an existing job. Basically all
830      * the settings file for the 'based on' will be copied to the specified
831      * directory.
832      *
833      * @param orderFile Order file to base new order file on.  Cannot be null.
834      * @param name Name for the new settings
835      * @param description Description of the new settings.
836      * @param seeds The contents of the new settings' seed file.
837      * @param newSettingsDir
838      * @param errorHandler
839      * @param filename Name of new order file.
840      * @param seedfile Name of new seeds file.
841      *
842      * @return The new settings handler.
843      * @throws FatalConfigurationException
844      *             If there are problems with reading the 'base on'
845      *             configuration, with writing the new configuration or it's
846      *             seed file.
847      */
848     protected XMLSettingsHandler createSettingsHandler(
849         final File orderFile, final String name, final String description,
850         final String seeds, final File newSettingsDir,
851         final CrawlJobErrorHandler errorHandler,
852         final String filename, final String seedfile)
853     throws FatalConfigurationException {
854         XMLSettingsHandler newHandler = null;
855         try {
856             newHandler = new XMLSettingsHandler(orderFile);
857             if(errorHandler != null){
858                 newHandler.registerValueErrorHandler(errorHandler);
859             }
860             newHandler.setErrorReportingLevel(errorHandler.getLevel());
861             newHandler.initialize();
862         } catch (InvalidAttributeValueException e2) {
863             throw new FatalConfigurationException(
864                 "InvalidAttributeValueException occured while creating" +
865                 " new settings handler for new job/profile\n" +
866                 e2.getMessage());
867         }
868 
869         // Make sure the directory exists.
870         newSettingsDir.mkdirs();
871 
872         try {
873             // Set the seed file
874             ((ComplexType)newHandler.getOrder().getAttribute("scope"))
875                 .setAttribute(new Attribute("seedsfile", seedfile));
876         } catch (AttributeNotFoundException e1) {
877             throw new FatalConfigurationException(
878                     "AttributeNotFoundException occured while setting up" +
879                     "new job/profile\n" + e1.getMessage());
880         } catch (InvalidAttributeValueException e1) {
881             throw new FatalConfigurationException(
882                     "InvalidAttributeValueException occured while setting" +
883                     "up new job/profile\n"  + e1.getMessage());
884         } catch (MBeanException e1) {
885             throw new FatalConfigurationException(
886                     "MBeanException occured while setting up new" +
887                     " job/profile\n" + e1.getMessage());
888         } catch (ReflectionException e1) {
889             throw new FatalConfigurationException(
890                     "ReflectionException occured while setting up" +
891                     " new job/profile\n" + e1.getMessage());
892         }
893 
894         File newFile = new File(newSettingsDir.getAbsolutePath(), filename);
895         
896         try {
897             newHandler.copySettings(newFile, (String)newHandler.getOrder()
898                 .getAttribute(CrawlOrder.ATTR_SETTINGS_DIRECTORY));
899         } catch (IOException e3) {
900             // Print stack trace to help debug issue where cannot create
901             // new job from an old that has overrides.
902             e3.printStackTrace();
903             throw new FatalConfigurationException(
904                     "IOException occured while writing new settings files" +
905                     " for new job/profile\n" + e3.getMessage());
906         } catch (AttributeNotFoundException e) {
907             throw new FatalConfigurationException(
908                     "AttributeNotFoundException occured while writing new" +
909                     " settings files for new job/profile\n" + e.getMessage());
910         } catch (MBeanException e) {
911             throw new FatalConfigurationException(
912                     "MBeanException occured while writing new settings files" +
913                     " for new job/profile\n" + e.getMessage());
914         } catch (ReflectionException e) {
915             throw new FatalConfigurationException(
916                     "ReflectionException occured while writing new settings" +
917                     " files for new job/profile\n" + e.getMessage());
918         }
919         CrawlerSettings orderfile = newHandler.getSettingsObject(null);
920 
921         orderfile.setName(name);
922         orderfile.setDescription(description);
923 
924         if (seeds != null) {
925             BufferedWriter writer = null;
926             try {
927                 writer = new BufferedWriter(
928                     new OutputStreamWriter(
929                         new FileOutputStream(
930                             newHandler.getPathRelativeToWorkingDirectory(seedfile)),
931                         "UTF-8"));
932                 try {
933                     writer.write(seeds);
934                 } finally {
935                     writer.close();
936                 }
937             } catch (IOException e) {
938                 throw new FatalConfigurationException(
939                     "IOException occured while writing seed file for new"
940                         + " job/profile\n" + e.getMessage());
941             }
942         }
943         return newHandler;
944     }
945     
946     /***
947      * @param recover
948      *            Source to use recovering. Can be full path to a recovery log
949      *            or full path to a checkpoint src dir.
950      * @param sh
951      *            Settings Handler to update.
952      * @param jobName
953      *            Name of this job.
954      * @throws FatalConfigurationException 
955      */
956     protected void updateRecoveryPaths(final File recover,
957             final SettingsHandler sh, final String jobName)
958     throws FatalConfigurationException {
959         if (recover == null) {
960             return;
961         }
962         checkDirectory(recover);
963         try {
964             // Set 'recover-path' to be old job's recovery log path
965             updateRecoveryPaths(recover, sh);
966         } catch (AttributeNotFoundException e1) {
967             throw new FatalConfigurationException(
968                     "AttributeNotFoundException occured while setting up"
969                             + "new job/profile " + jobName + " \n"
970                             + e1.getMessage());
971         } catch (InvalidAttributeValueException e1) {
972             throw new FatalConfigurationException(
973                     "InvalidAttributeValueException occured while setting"
974                             + "new job/profile " + jobName + " \n"
975                             + e1.getMessage());
976         } catch (MBeanException e1) {
977             throw new FatalConfigurationException(
978                     "MBeanException occured while setting up new"
979                             + "new job/profile " + jobName + " \n"
980                             + e1.getMessage());
981         } catch (ReflectionException e1) {
982             throw new FatalConfigurationException(
983                     "ReflectionException occured while setting up"
984                             + "new job/profile " + jobName + " \n"
985                             + e1.getMessage());
986         } catch (IOException e) {
987             throw new FatalConfigurationException(
988                     "IOException occured while setting up" + "new job/profile "
989                             + jobName + " \n" + e.getMessage());
990         }
991     }
992 
993     /***
994      * @param recover
995      *            Source to use recovering. Can be full path to a recovery log
996      *            or full path to a checkpoint src dir.
997      * @param newHandler
998      * @throws ReflectionException
999      * @throws MBeanException
1000      * @throws InvalidAttributeValueException
1001      * @throws AttributeNotFoundException
1002      * @throws IOException
1003      */
1004     private void updateRecoveryPaths(final File recover,
1005         SettingsHandler newHandler)
1006     throws AttributeNotFoundException, InvalidAttributeValueException,
1007     MBeanException, ReflectionException, IOException {
1008         if (recover == null || !recover.exists()) {
1009             throw new IOException("Recovery src does not exist: " + recover);
1010         }
1011         newHandler.getOrder().setAttribute(
1012             new Attribute(CrawlOrder.ATTR_RECOVER_PATH,
1013                 recover.getAbsolutePath()));
1014             
1015         // Now, ensure that 'logs' and 'state' don't overlap with
1016         // previous job's files (ok for 'arcs' and 'scratch' to overlap)
1017         File newLogsDisk = null;
1018         final String RECOVERY_SUFFIX = "-R";
1019         while(true) {
1020             try {
1021                 newLogsDisk = newHandler.getOrder().
1022                     getSettingsDir(CrawlOrder.ATTR_LOGS_PATH);
1023             } catch (AttributeNotFoundException e) {
1024                 logger.log(Level.SEVERE, "Failed to get logs directory", e);
1025             }
1026             if (newLogsDisk.list().length > 0) {
1027                 // 'new' directory is nonempty; rename with trailing '-R'
1028                 String logsPath =  (String) newHandler.getOrder().
1029                     getAttribute(CrawlOrder.ATTR_LOGS_PATH);
1030                 if(logsPath.endsWith("/")) {
1031                     logsPath = logsPath.substring(0,logsPath.length()-1);
1032                 }
1033                 newHandler.getOrder().setAttribute(
1034                     new Attribute(CrawlOrder.ATTR_LOGS_PATH,
1035                         logsPath + RECOVERY_SUFFIX));
1036             } else {
1037                 // directory is suitably empty; exit loop
1038                 break;
1039             }
1040         }
1041         File newStateDisk = null;
1042         while (true) {
1043             try {
1044                 newStateDisk = newHandler.getOrder().getSettingsDir(
1045                         CrawlOrder.ATTR_STATE_PATH);
1046             } catch (AttributeNotFoundException e) {
1047                 logger.log(Level.SEVERE, "Failed to get state directory", e);
1048             }
1049             if (newStateDisk.list().length>0) {
1050                 // 'new' directory is nonempty; rename with trailing '-R'
1051                 String statePath =  (String) newHandler.getOrder().
1052                     getAttribute(CrawlOrder.ATTR_STATE_PATH);
1053                 if(statePath.endsWith("/")) {
1054                     statePath = statePath.substring(0,statePath.length()-1);
1055                 }
1056                 newHandler.getOrder().setAttribute(
1057                     new Attribute(CrawlOrder.ATTR_STATE_PATH,
1058                         statePath + RECOVERY_SUFFIX));
1059             } else {
1060                 // directory is suitably empty; exit loop
1061                 break;
1062             }
1063         }
1064     }
1065 
1066     /***
1067      * Discard the handler's 'new job'. This will remove any files/directories
1068      * written to disk.
1069      */
1070     public void discardNewJob(){
1071         FileUtils.deleteDir(new File(newJob.getSettingsDirectory()));
1072     }
1073 
1074     /***
1075      * Get the handler's 'new job'
1076      * @return the handler's 'new job'
1077      */
1078     public CrawlJob getNewJob(){
1079         return newJob;
1080     }
1081 
1082     /***
1083      * Is the crawler accepting crawl jobs to run?
1084      * @return True if the next availible CrawlJob will be crawled. False otherwise.
1085      */
1086     public boolean isRunning() {
1087         return running;
1088     }
1089 
1090     /***
1091      * Is a crawl job being crawled?
1092      * @return True if a job is actually being crawled (even if it is paused).
1093      *         False if no job is being crawled.
1094      */
1095     public boolean isCrawling() {
1096         return this.currentJob != null;
1097     }
1098 
1099     /***
1100      * Allow jobs to be crawled.
1101      */
1102     public void startCrawler() {
1103         running = true;
1104         if (pendingCrawlJobs.size() > 0 && isCrawling() == false) {
1105             // Ok, can just start the next job
1106             startNextJob();
1107         }
1108     }
1109 
1110     /***
1111      * Stop future jobs from being crawled.
1112      *
1113      * This action will not affect the current job.
1114      */
1115     public void stopCrawler() {
1116         running = false;
1117     }
1118 
1119     /***
1120      * Start next crawl job.
1121      *
1122      * If a is job already running this method will do nothing.
1123      */
1124     protected final void startNextJob() {
1125         synchronized (this) {
1126             if(startingNextJob != null) {
1127                 try {
1128                     startingNextJob.join();
1129                 } catch (InterruptedException e) {
1130                     logger.log(Level.WARNING, 
1131                             "interrupt waiting for job start to complete", e);
1132                     return;
1133                 }
1134             }
1135             startingNextJob = new Thread(new Runnable() {
1136                 public void run() {
1137                     startNextJobInternal();
1138                 }
1139             }, "StartNextJob");
1140             startingNextJob.start();
1141         }
1142     }
1143     
1144     protected void startNextJobInternal() {
1145         if (pendingCrawlJobs.size() == 0 || isCrawling()) {
1146             // No job ready or already crawling.
1147             return;
1148         }
1149         this.currentJob = (CrawlJob)pendingCrawlJobs.first();
1150         assert pendingCrawlJobs.contains(currentJob) :
1151             "pendingCrawlJobs is in an illegal state";
1152         pendingCrawlJobs.remove(currentJob);
1153         try {
1154             this.currentJob.setupForCrawlStart();
1155             // This is ugly but needed so I can clear the currentJob
1156             // reference in the crawlEnding and update the list of completed
1157             // jobs.  Also, crawlEnded can startup next job.
1158             this.currentJob.getController().addCrawlStatusListener(this);
1159             // now, actually start
1160             this.currentJob.getController().requestCrawlStart();
1161         } catch (InitializationException e) {
1162             loadJob(getStateJobFile(this.currentJob.getDirectory()));
1163             this.currentJob = null;
1164             startNextJobInternal(); // Load the next job if there is one.
1165         }
1166     }
1167 
1168     /***
1169      * Forward a 'kick' update to current job if any.
1170      */
1171     public void kickUpdate() {
1172         if(this.currentJob != null) {
1173             this.currentJob.kickUpdate();
1174         }
1175     }
1176 
1177     /***
1178      * Loads options from a file. Typically these are a list of available
1179      * modules that can be plugged into some part of the configuration.
1180      * For examples Processors, Frontiers, Filters etc. Leading and trailing
1181      * spaces are trimmed from each line.
1182      * 
1183      * <p>Options are loaded from the CLASSPATH.
1184      * @param file the name of the option file (without path!)
1185      * @return The option file with each option line as a seperate entry in the
1186      *         ArrayList.
1187      * @throws IOException when there is trouble reading the file.
1188      */
1189     public static ArrayList<String> loadOptions(String file)
1190     throws IOException {
1191         ArrayList<String> ret = new ArrayList<String>();
1192         Enumeration resources = 
1193             CrawlJob.class.getClassLoader().getResources("modules/" + file);
1194 
1195         boolean noFileFound = true;
1196         while (resources.hasMoreElements()) {
1197             InputStream is = ((URL) resources.nextElement()).openStream();
1198             noFileFound = false;
1199 
1200             String line = null;
1201             BufferedReader bf =
1202                 new BufferedReader(new InputStreamReader(is), 8192);
1203             try {
1204                 while ((line = bf.readLine()) != null) {
1205                     line = line.trim();
1206                     if(line.indexOf('#')<0 && line.length()>0){
1207                         // Looks like a valid line.
1208                         ret.add(line);
1209                     }
1210                 }
1211             } finally {
1212                 bf.close();
1213             }
1214         }
1215         
1216         if (noFileFound) {
1217             throw new IOException("Failed to get " + file + " from the " +
1218                 " CLASSPATH");
1219         }
1220 
1221         return ret;
1222     }
1223 
1224     /***
1225      * Returns a URIFrontierMarker for the current, paused, job. If there is no
1226      * current job or it is not paused null will be returned.
1227      *
1228      * @param regexpr
1229      *            A regular expression that each URI must match in order to be
1230      *            considered 'within' the marker.
1231      * @param inCacheOnly
1232      *            Limit marker scope to 'cached' URIs.
1233      * @return a URIFrontierMarker for the current job.
1234      * @see #getPendingURIsList(FrontierMarker, int, boolean)
1235      * @see org.archive.crawler.framework.Frontier#getInitialMarker(String,
1236      *      boolean)
1237      * @see org.archive.crawler.framework.FrontierMarker
1238      */
1239     public FrontierMarker getInitialMarker(String regexpr,
1240             boolean inCacheOnly) {
1241         return (this.currentJob != null)?
1242                 this.currentJob.getInitialMarker(regexpr, inCacheOnly): null;
1243     }
1244 
1245     /***
1246      * Returns the frontiers URI list based on the provided marker. This method
1247      * will return null if there is not current job or if the current job is
1248      * not paused. Only when there is a paused current job will this method
1249      * return a URI list.
1250      *
1251      * @param marker
1252      *            URIFrontier marker
1253      * @param numberOfMatches
1254      *            maximum number of matches to return
1255      * @param verbose
1256      *            should detailed info be provided on each URI?
1257      * @return the frontiers URI list based on the provided marker
1258      * @throws InvalidFrontierMarkerException
1259      *             When marker is inconsistent with the current state of the
1260      *             frontier.
1261      * @see #getInitialMarker(String, boolean)
1262      * @see org.archive.crawler.framework.FrontierMarker
1263      */
1264     public ArrayList getPendingURIsList(FrontierMarker marker,
1265             int numberOfMatches, boolean verbose)
1266     throws InvalidFrontierMarkerException {
1267         return (this.currentJob != null)?
1268            this.currentJob.getPendingURIsList(marker, numberOfMatches, verbose):
1269            null;
1270     }
1271 
1272     /***
1273      * Delete any URI from the frontier of the current (paused) job that match
1274      * the specified regular expression. If the current job is not paused (or
1275      * there is no current job) nothing will be done.
1276      * @param regexpr Regular expression to delete URIs by.
1277      * @return the number of URIs deleted
1278      */
1279     public long deleteURIsFromPending(String regexpr) {
1280         return deleteURIsFromPending(regexpr, null);
1281     }
1282     
1283     /***
1284      * Delete any URI from the frontier of the current (paused) job that match
1285      * the specified regular expression. If the current job is not paused (or
1286      * there is no current job) nothing will be done.
1287      * @param uriPattern Regular expression to delete URIs by.
1288      * @param queuePattern Regular expression of target queues (or null for all)
1289      * @return the number of URIs deleted
1290      */
1291     public long deleteURIsFromPending(String uriPattern, String queuePattern) {
1292         return (this.currentJob != null)?
1293                 this.currentJob.deleteURIsFromPending(uriPattern,queuePattern): 0;
1294     }
1295     
1296     public String importUris(String file, String style, String force) {
1297         return importUris(file, style, "true".equals(force));
1298     }
1299 
1300     /***
1301      * @param fileOrUrl Name of file w/ seeds.
1302      * @param style What style of seeds -- crawl log (<code>crawlLog</code>
1303      * style) or recovery journal (<code>recoveryJournal</code> style), or
1304      * seeds file style (Pass <code>default</code> style).
1305      * @param forceRevisit Should we revisit even if seen before?
1306      * @return A display string that has a count of all added.
1307      */
1308     public String importUris(final String fileOrUrl, final String style,
1309             final boolean forceRevisit) {
1310         return (this.currentJob != null)?
1311             this.currentJob.importUris(fileOrUrl, style, forceRevisit): null;
1312     }
1313     
1314     protected int importUris(InputStream is, String style,
1315             boolean forceRevisit) {
1316         return (this.currentJob != null)?
1317                 this.currentJob.importUris(is, style, forceRevisit): 0;
1318     }
1319     
1320     /***
1321      * Schedule a uri.
1322      * @param uri Uri to schedule.
1323      * @param forceFetch Should it be forcefetched.
1324      * @param isSeed True if seed.
1325      * @throws URIException
1326      */
1327     public void importUri(final String uri, final boolean forceFetch,
1328             final boolean isSeed)
1329     throws URIException {
1330         importUri(uri, forceFetch, isSeed, true);
1331     }
1332     
1333     /***
1334      * Schedule a uri.
1335      * @param str String that can be: 1. a UURI, 2. a snippet of the
1336      * crawl.log line, or 3. a snippet from recover log.  See
1337      * {@link #importUris(InputStream, String, boolean)} for how it subparses
1338      * the lines from crawl.log and recover.log.
1339      * @param forceFetch Should it be forcefetched.
1340      * @param isSeed True if seed.
1341      * @param isFlush If true, flush the frontier IF it implements
1342      * flushing.
1343      * @throws URIException
1344      */
1345     public void importUri(final String str, final boolean forceFetch,
1346             final boolean isSeed, final boolean isFlush)
1347     throws URIException {
1348         if (this.currentJob != null) {
1349             this.currentJob.importUri(str, forceFetch, isSeed, isFlush);
1350         }
1351     }
1352     
1353     /***
1354      * If its a HostQueuesFrontier, needs to be flushed for the queued.
1355      */
1356     protected void doFlush() {
1357         if (this.currentJob != null) {
1358             this.currentJob.flush();
1359         }
1360     }
1361     
1362     public void stop() {
1363         if (isCrawling()) {
1364             deleteJob(getCurrentJob().getUID());
1365         }
1366     }
1367     
1368     public void requestCrawlStop() {
1369         if (this.currentJob != null) {
1370             this.currentJob.stopCrawling();
1371         }
1372     }
1373     
1374     /***
1375      * Ensure order file with new name/desc is written.
1376      * See '[ 1066573 ] sometimes job based-on other job uses older job name'.
1377      * @param newJob Newly created job.
1378      * @param metaname Metaname for new job.
1379      * @param description Description for new job.
1380      * @return <code>newJob</code>
1381      */
1382     public static CrawlJob ensureNewJobWritten(CrawlJob newJob, String metaname,
1383             String description) {
1384         XMLSettingsHandler settingsHandler = newJob.getSettingsHandler();
1385         CrawlerSettings orderfile = settingsHandler.getSettingsObject(null);
1386         orderfile.setName(metaname);
1387         orderfile.setDescription(description);
1388         settingsHandler.writeSettingsObject(orderfile);
1389         return newJob;
1390     }
1391 
1392     public void crawlStarted(String message) {
1393         // TODO Auto-generated method stub
1394         
1395     }
1396 
1397     public void crawlEnding(String sExitMessage) {
1398         loadJob(getStateJobFile(this.currentJob.getDirectory()));
1399         currentJob = null;
1400         synchronized (this) {
1401             // If the GUI terminated the job then it is waiting for this event.
1402             notifyAll();
1403         }
1404     }
1405 
1406     public void crawlEnded(String sExitMessage) {
1407         if (this.running) {
1408             startNextJob();
1409         }
1410     }
1411 
1412     public void crawlPausing(String statusMessage) {
1413         // TODO Auto-generated method stub
1414         
1415     }
1416 
1417     public void crawlPaused(String statusMessage) {
1418         // TODO Auto-generated method stub
1419         
1420     }
1421 
1422     public void crawlResuming(String statusMessage) {
1423         // TODO Auto-generated method stub
1424     }
1425 
1426     public void crawlCheckpoint(File checkpointDir) throws Exception {
1427         // TODO Auto-generated method stub
1428     }
1429 }