1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 package org.archive.io;
26
27 import java.io.EOFException;
28 import java.io.IOException;
29 import java.io.InputStream;
30 import java.util.zip.CRC32;
31 import java.util.zip.Deflater;
32 import java.util.zip.GZIPInputStream;
33
34 /***
35 * Read in the GZIP header.
36 *
37 * See RFC1952 for specification on what the header looks like.
38 * Assumption is that stream is cued-up with the gzip header as the
39 * next thing to be read.
40 *
41 * <p>Of <a href="http://jguru.com/faq/view.jsp?EID=13647">Java
42 * and unsigned bytes</a>. That is, its always a signed int in
43 * java no matter what the qualifier whether byte, char, etc.
44 *
45 * <p>Add accessors for optional filename, comment and MTIME.
46 *
47 * @author stack
48 */
49 public class GzipHeader {
50 /***
51 * Length of minimal GZIP header.
52 *
53 * See RFC1952 for explaination of value of 10.
54 */
55 public static final int MINIMAL_GZIP_HEADER_LENGTH = 10;
56
57 /***
58 * Total length of the gzip header.
59 */
60 protected int length = 0;
61
62 /***
63 * The GZIP header FLG byte.
64 */
65 protected int flg;
66
67 /***
68 * GZIP header XFL byte.
69 */
70 private int xfl;
71
72 /***
73 * GZIP header OS byte.
74 */
75 private int os;
76
77 /***
78 * Extra header field content.
79 */
80 private byte [] fextra = null;
81
82 /***
83 * GZIP header MTIME field.
84 */
85 private int mtime;
86
87
88 /***
89 * Shutdown constructor.
90 *
91 * Must pass an input stream.
92 */
93 public GzipHeader() {
94 super();
95 }
96
97 /***
98 * Constructor.
99 *
100 * This constructor advances the stream past any gzip header found.
101 *
102 * @param in InputStream to read from.
103 * @throws IOException
104 */
105 public GzipHeader(InputStream in) throws IOException {
106 super();
107 readHeader(in);
108 }
109
110 /***
111 * Read in gzip header.
112 *
113 * Advances the stream past the gzip header.
114 * @param in InputStream.
115 *
116 * @throws IOException Throws if does not start with GZIP Header.
117 */
118 public void readHeader(InputStream in) throws IOException {
119 CRC32 crc = new CRC32();
120 crc.reset();
121 if (!testGzipMagic(in, crc)) {
122 throw new NoGzipMagicException();
123 }
124 this.length += 2;
125 if (readByte(in, crc) != Deflater.DEFLATED) {
126 throw new IOException("Unknown compression");
127 }
128 this.length++;
129
130
131 this.flg = readByte(in, crc);
132 this.length++;
133
134
135 this.mtime = readInt(in, crc);
136 this.length += 4;
137
138
139 this.xfl = readByte(in, crc);
140 this.length++;
141 this.os = readByte(in, crc);
142 this.length++;
143
144
145 final int FLG_FEXTRA = 4;
146 if ((this.flg & FLG_FEXTRA) == FLG_FEXTRA) {
147 int count = readShort(in, crc);
148 this.length +=2;
149 this.fextra = new byte[count];
150 readByte(in, crc, this.fextra, 0, count);
151 this.length += count;
152 }
153
154
155 final int FLG_FNAME = 8;
156 if ((this.flg & FLG_FNAME) == FLG_FNAME) {
157 while (readByte(in, crc) != 0) {
158 this.length++;
159 }
160 }
161
162
163 final int FLG_FCOMMENT = 16;
164 if ((this.flg & FLG_FCOMMENT) == FLG_FCOMMENT) {
165 while (readByte(in, crc) != 0) {
166 this.length++;
167 }
168 }
169
170
171 final int FLG_FHCRC = 2;
172 if ((this.flg & FLG_FHCRC) == FLG_FHCRC) {
173 int calcCrc = (int)(crc.getValue() & 0xffff);
174 if (readShort(in, crc) != calcCrc) {
175 throw new IOException("Bad header CRC");
176 }
177 this.length += 2;
178 }
179 }
180
181 /***
182 * Test gzip magic is next in the stream.
183 * Reads two bytes. Caller needs to manage resetting stream.
184 * @param in InputStream to read.
185 * @return true if found gzip magic. False otherwise
186 * or an IOException (including EOFException).
187 * @throws IOException
188 */
189 public boolean testGzipMagic(InputStream in) throws IOException {
190 return testGzipMagic(in, null);
191 }
192
193 /***
194 * Test gzip magic is next in the stream.
195 * Reads two bytes. Caller needs to manage resetting stream.
196 * @param in InputStream to read.
197 * @param crc CRC to update.
198 * @return true if found gzip magic. False otherwise
199 * or an IOException (including EOFException).
200 * @throws IOException
201 */
202 public boolean testGzipMagic(InputStream in, CRC32 crc)
203 throws IOException {
204 return readShort(in, crc) == GZIPInputStream.GZIP_MAGIC;
205 }
206
207 /***
208 * Read an int.
209 *
210 * We do not expect to get a -1 reading. If we do, we throw exception.
211 * Update the crc as we go.
212 *
213 * @param in InputStream to read.
214 * @param crc CRC to update.
215 * @return int read.
216 *
217 * @throws IOException
218 */
219 private int readInt(InputStream in, CRC32 crc) throws IOException {
220 int s = readShort(in, crc);
221 return ((readShort(in, crc) << 16) & 0xffff0000) | s;
222 }
223
224 /***
225 * Read a short.
226 *
227 * We do not expect to get a -1 reading. If we do, we throw exception.
228 * Update the crc as we go.
229 *
230 * @param in InputStream to read.
231 * @param crc CRC to update.
232 * @return Short read.
233 *
234 * @throws IOException
235 */
236 private int readShort(InputStream in, CRC32 crc) throws IOException {
237 int b = readByte(in, crc);
238 return ((readByte(in, crc) << 8) & 0x00ff00) | b;
239 }
240
241 /***
242 * Read a byte.
243 *
244 * We do not expect to get a -1 reading. If we do, we throw exception.
245 * Update the crc as we go.
246 *
247 * @param in InputStream to read.
248 * @return Byte read.
249 *
250 * @throws IOException
251 */
252 protected int readByte(InputStream in) throws IOException {
253 return readByte(in, null);
254 }
255
256 /***
257 * Read a byte.
258 *
259 * We do not expect to get a -1 reading. If we do, we throw exception.
260 * Update the crc as we go.
261 *
262 * @param in InputStream to read.
263 * @param crc CRC to update.
264 * @return Byte read.
265 *
266 * @throws IOException
267 */
268 protected int readByte(InputStream in, CRC32 crc) throws IOException {
269 int b = in.read();
270 if (b == -1) {
271 throw new EOFException();
272 }
273 if (crc != null) {
274 crc.update(b);
275 }
276 return b & 0xff;
277 }
278
279 /***
280 * Read a byte.
281 *
282 * We do not expect to get a -1 reading. If we do, we throw exception.
283 * Update the crc as we go.
284 *
285 * @param in InputStream to read.
286 * @param crc CRC to update.
287 * @param buffer Buffer to read into.
288 * @param offset Offset to start filling buffer at.
289 * @param length How much to read.
290 * @return Bytes read.
291 *
292 * @throws IOException
293 */
294 protected int readByte(InputStream in, CRC32 crc, byte [] buffer,
295 int offset, int length)
296 throws IOException {
297 for (int i = offset; i < length; i++) {
298 buffer[offset + i] = (byte)readByte(in, crc);
299 }
300 return length;
301 }
302
303 /***
304 * @return Returns the fextra.
305 */
306 public byte[] getFextra() {
307 return this.fextra;
308 }
309
310 /***
311 * @return Returns the flg.
312 */
313 public int getFlg() {
314 return this.flg;
315 }
316
317 /***
318 * @return Returns the os.
319 */
320 public int getOs() {
321 return this.os;
322 }
323
324 /***
325 * @return Returns the xfl.
326 */
327 public int getXfl() {
328 return this.xfl;
329 }
330
331 /***
332 * @return Returns the mtime.
333 */
334 public int getMtime() {
335 return this.mtime;
336 }
337
338 /***
339 * @return Returns the length.
340 */
341 public int getLength() {
342 return length;
343 }
344 }