1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.crawler.frontier;
26
27 import org.archive.crawler.datamodel.CoreAttributeConstants;
28
29 /***
30 * Defines static constants for the Adaptive Revisiting module defining data
31 * keys in the CrawlURI AList.
32 *
33 * @author Kristinn Sigurdsson
34 *
35 * @see org.archive.crawler.datamodel.CoreAttributeConstants
36 */
37 public interface AdaptiveRevisitAttributeConstants
38 extends CoreAttributeConstants {
39
40 /*** Designates a field in the CrawlURIs AList for the content digest of
41 * an earlier visit. */
42 public static final String A_LAST_CONTENT_DIGEST = "last-content-digest";
43 public static final String A_TIME_OF_NEXT_PROCESSING =
44 "time-of-next-processing";
45 public static final String A_WAIT_INTERVAL = "wait-interval";
46 public static final String A_NUMBER_OF_VISITS = "number-of-visits";
47 public static final String A_NUMBER_OF_VERSIONS = "number-of-versions";
48 public static final String A_FETCH_OVERDUE = "fetch-overdue";
49
50 public static final String A_LAST_ETAG = "last-etag";
51 public static final String A_LAST_DATESTAMP = "last-datestamp";
52
53 public static final String A_WAIT_REEVALUATED = "wait-reevaluated";
54
55 /*** Mark a URI to be dropped from revisit handling. Used for custom
56 * processors that want to implement more selective revisiting.
57 * Actual effect depends on whether an alreadyIncluded structure
58 * is used. If an alreadyIncluded is used, dropping the URI from
59 * revisit handling means it won't be visited again. If an
60 * alreadyIncluded is not used, this merely drops one discovery of
61 * the URI, and it may be rediscovered and thus revisited that way.
62 */
63 public static final String A_DISCARD_REVISIT = "no-revisit";
64
65 /*** No knowledge of URI content. Possibly not fetched yet, unable
66 * to check if different or an error occurred on last fetch attempt. */
67 public static final int CONTENT_UNKNOWN = -1;
68
69 /*** URI content has not changed between the two latest, successfully
70 * completed fetches. */
71 public static final int CONTENT_UNCHANGED = 0;
72
73 /*** URI content had changed between the two latest, successfully completed
74 * fetches. By definition, content has changed if there has only been one
75 * successful fetch made. */
76 public static final int CONTENT_CHANGED = 1;
77
78 /***
79 * Key to use getting state of crawluri from the CrawlURI alist.
80 */
81 public static final String A_CONTENT_STATE_KEY = "ar-state";
82 }