1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.io.arc;
26
27 import java.io.File;
28 import java.io.IOException;
29 import java.util.Iterator;
30 import java.util.Map;
31 import java.util.Set;
32
33 import org.archive.io.ArchiveRecordHeader;
34
35
36 /***
37 * An immutable class to hold an ARC record meta data.
38 *
39 * @author stack
40 */
41 public class ARCRecordMetaData implements ArchiveRecordHeader, ARCConstants {
42 /***
43 * Map of record header fields.
44 *
45 * We store all in a hashmap. This way we can hold version 1 or
46 * version 2 record meta data.
47 *
48 * <p>Keys are lowercase.
49 */
50 protected Map headerFields = null;
51
52 /***
53 * Digest for the record.
54 *
55 * Only available after the record has been read in totality.
56 */
57 private String digest = null;
58
59 /***
60 * Status for this request.
61 *
62 * There may be no status.
63 */
64 private String statusCode = null;
65
66 /***
67 * The arc this metadata came out.
68 * Descriptive String, either path or URL.
69 */
70 private String arc = null;
71
72 private int contentBegin = 0;
73
74 /***
75 * Shut down the default constructor.
76 */
77 protected ARCRecordMetaData() {
78 super();
79 }
80
81 /***
82 * Constructor.
83 *
84 * @param arc The arc file this metadata came out of.
85 * @param headerFields Hash of meta fields.
86 *
87 * @throws IOException
88 */
89 public ARCRecordMetaData(final String arc, Map headerFields)
90 throws IOException {
91
92 for (Iterator i = REQUIRED_VERSION_1_HEADER_FIELDS.iterator();
93 i.hasNext(); ) {
94 testRequiredField(headerFields, (String)i.next());
95 }
96 this.headerFields = headerFields;
97 this.arc = arc;
98 }
99
100 /***
101 * Test required field is present in hash.
102 *
103 * @param fields Map of fields.
104 * @param requiredField Field to test for.
105 *
106 * @exception IOException If required field is not present.
107 */
108 protected void testRequiredField(Map fields, String requiredField)
109 throws IOException {
110 if (!fields.containsKey(requiredField)) {
111 throw new IOException("Required field " + requiredField +
112 " not in meta data.");
113 }
114 }
115
116 /***
117 * Get the time when the record was harvested.
118 * <p>
119 * Returns the date in Heritrix 14 digit time format (UTC). See the
120 * {@link org.archive.util.ArchiveUtils} class for converting to Java
121 * dates.
122 *
123 * @return Header date in Heritrix 14 digit format.
124 * @see org.archive.util.ArchiveUtils#parse14DigitDate(String)
125 */
126 public String getDate() {
127 return (String) this.headerFields.get(DATE_FIELD_KEY);
128 }
129
130 /***
131 * @return Return length of the record.
132 */
133 public long getLength() {
134 return Long.parseLong((String)this.headerFields.
135 get(LENGTH_FIELD_KEY));
136 }
137
138 /***
139 * @return Header url.
140 */
141 public String getUrl() {
142 return (String)this.headerFields.get(URL_FIELD_KEY);
143 }
144
145 /***
146 * @return IP.
147 */
148 public String getIp()
149 {
150 return (String)this.headerFields.get(IP_HEADER_FIELD_KEY);
151 }
152
153 /***
154 * @return mimetype The mimetype that is in the ARC metaline -- NOT the http
155 * content-type content.
156 */
157 public String getMimetype() {
158 return (String)this.headerFields.get(MIMETYPE_FIELD_KEY);
159 }
160
161 /***
162 * @return Arcfile version.
163 */
164 public String getVersion() {
165 return (String)this.headerFields.get(VERSION_FIELD_KEY);
166 }
167
168 /***
169 * @return Offset into arcfile at which this record begins.
170 */
171 public long getOffset() {
172 return ((Long)this.headerFields.get(ABSOLUTE_OFFSET_KEY)).longValue();
173 }
174
175 /***
176 * @param key Key to use looking up field value.
177 * @return value for passed key of null if no such entry.
178 */
179 public Object getHeaderValue(String key) {
180 return this.headerFields.get(key);
181 }
182
183 /***
184 * @return Header field name keys.
185 */
186 public Set getHeaderFieldKeys()
187 {
188 return this.headerFields.keySet();
189 }
190
191 /***
192 * @return Map of header fields.
193 */
194 public Map getHeaderFields() {
195 return this.headerFields;
196 }
197
198 /***
199 * @return Returns identifier for ARC.
200 */
201 public String getArc() {
202 return this.arc;
203 }
204
205 /***
206 * @return Convenience method that does a
207 * return new File(this.arc) (Be aware this.arc is not always
208 * full path to an ARC file -- may be an URL). Test
209 * returned file for existence.
210 */
211 public File getArcFile() {
212 return new File(this.arc);
213 }
214
215 /***
216 * @return Returns the digest.
217 */
218 public String getDigest() {
219 return this.digest;
220 }
221
222 /***
223 * @param d The digest to set.
224 */
225 public void setDigest(String d) {
226 this.digest = d;
227 }
228
229 /***
230 * @return Returns the statusCode. May be null.
231 */
232 public String getStatusCode() {
233 return this.statusCode;
234 }
235
236 /***
237 * @param statusCode The statusCode to set.
238 */
239 public void setStatusCode(String statusCode) {
240 this.statusCode = statusCode;
241 }
242
243 public String toString() {
244 return ((this.arc != null)? this.arc: "") +
245 ": " +
246 ((this.headerFields != null)? this.headerFields.toString(): "");
247 }
248
249 public String getReaderIdentifier() {
250 return this.getArc();
251 }
252
253 public String getRecordIdentifier() {
254 return getDate() + "/" + getUrl();
255 }
256
257 public int getContentBegin() {
258 return this.contentBegin;
259 }
260
261 void setContentBegin(final int offset) {
262 this.contentBegin = offset;
263 }
264 }