View Javadoc

1   /* $Id: ArchiveRecordHeader.java 4547 2006-08-28 23:44:20Z stack-sf $
2    *
3    * Created on August 21st, 2006
4    *
5    * Copyright (C) 2006 Internet Archive.
6    *
7    * This file is part of the Heritrix web crawler (crawler.archive.org).
8    *
9    * Heritrix is free software; you can redistribute it and/or modify
10   * it under the terms of the GNU Lesser Public License as published by
11   * the Free Software Foundation; either version 2.1 of the License, or
12   * any later version.
13   *
14   * Heritrix is distributed in the hope that it will be useful,
15   * but WITHOUT ANY WARRANTY; without even the implied warranty of
16   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17   * GNU Lesser Public License for more details.
18   *
19   * You should have received a copy of the GNU Lesser Public License
20   * along with Heritrix; if not, write to the Free Software
21   * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22   */
23  package org.archive.io;
24  
25  import java.util.Map;
26  import java.util.Set;
27  
28  /***
29   * Archive Record Header.
30   * @author stack
31   * @version $Date: 2006-08-28 23:44:20 +0000 (Mon, 28 Aug 2006) $ $Version$
32   */
33  public interface ArchiveRecordHeader {
34      /***
35       * Get the time when the record was created.
36       * @return Date in 14 digit time format (UTC).
37       * @see org.archive.util.ArchiveUtils#parse14DigitDate(String)
38       */
39      public abstract String getDate();
40  
41      /***
42       * @return Return length of record.
43       */
44      public abstract long getLength();
45  
46      /***
47       * @return Record subject-url.
48       */
49      public abstract String getUrl();
50  
51      /***
52       * @return Record mimetype.
53       */
54      public abstract String getMimetype();
55  
56      /***
57       * @return Record version.
58       */
59      public abstract String getVersion();
60  
61      /***
62       * @return Offset into Archive file at which this record begins.
63       */
64      public abstract long getOffset();
65  
66      /***
67       * @param key Key to use looking up field value.
68       * @return value for passed key of null if no such entry.
69       */
70      public abstract Object getHeaderValue(final String key);
71  
72      /***
73       * @return Header field name keys.
74       */
75      public abstract Set getHeaderFieldKeys();
76  
77      /***
78       * @return Map of header fields.
79       */
80      public abstract Map getHeaderFields();
81  
82      /***
83       * @return Returns identifier for current Archive file.  Be aware this
84       * may not be a file name or file path.  It may just be an URL.  Depends
85       * on how Archive file was made.
86       */
87      public abstract String getReaderIdentifier();
88      
89      /***
90       * @return Identifier for the record.  If ARC, the URL + date.  If WARC, 
91       * the GUID assigned.
92       */
93      public abstract String getRecordIdentifier();
94      
95      /***
96       * @return Returns digest as String for this record. Only available after
97       * the record has been read in totality.
98       */
99      public abstract String getDigest();
100 
101     /***
102      * Offset at which the content begins.
103      * For ARCs, its used to delimit where http headers end and content begins.
104      * For WARCs, its end of Named Fields before payload starts.
105      */
106     public int getContentBegin();
107 
108     public abstract String toString();
109 }