View Javadoc

1   /* FrontierJournal
2    * 
3    * Created on Oct 26, 2004
4    *
5    * Copyright (C) 2004 Internet Archive.
6    * 
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    * 
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   * 
14   * Heritrix is distributed in the hope that it will be useful, 
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   * 
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.crawler.frontier;
24  
25  import java.io.File;
26  import java.io.IOException;
27  
28  import org.archive.crawler.datamodel.CandidateURI;
29  
30  /***
31   * Record of key Frontier happenings.
32   * @author stack
33   * @version $Date: 2008-07-11 22:14:18 +0000 (Fri, 11 Jul 2008) $, $Revision: 5870 $
34   */
35  public interface FrontierJournal {
36      public static final String LOGNAME_RECOVER = "recover.gz";
37  
38      /***
39       * @param curi CrawlURI that has been scheduled to be added to the
40       * Frontier.
41       */
42      public abstract void added(CandidateURI curi);
43  
44      /***
45       * @param curi CrawlURI that finished successfully.
46       */
47      public abstract void finishedSuccess(CandidateURI curi);
48  
49      /***
50       * Note that a CrawlURI was emitted for processing.
51       * If not followed by a finished or rescheduled notation in
52       * the journal, the CrawlURI was still in-process when the journal ended.
53       * 
54       * @param curi CrawlURI emitted.
55       */
56      public abstract void emitted(CandidateURI curi);
57  
58      
59      /***
60       * @param curi CrawlURI finished unsuccessfully.
61       */
62      public abstract void finishedFailure(CandidateURI curi);
63  
64      /***
65       * @param curi CrawlURI finished disregarded (uncounted failure).
66       */
67      public abstract void finishedDisregard(CandidateURI curi);
68      
69      /***
70       * @param curi CrawlURI that was returned to the Frontier for 
71       * another try.
72       */
73      public abstract void rescheduled(CandidateURI curi);
74  
75      /***
76       *  Flush and close any held objects.
77       */
78      public abstract void close();
79      
80      /***
81       * Checkpoint.
82       * @param checkpointDir Directory we're checkpointing into.
83       * @throws IOException
84       */
85      public abstract void checkpoint(final File checkpointDir)
86      throws IOException;
87  
88      /***
89       * Add a line noting a serious crawl error. 
90       * 
91       * @param string
92       */
93      public abstract void seriousError(String string);
94  }