View Javadoc

1   /* BlockInputStream
2   *
3   * Created on September 12, 2006
4   *
5   * Copyright (C) 2006 Internet Archive.
6   *
7   * This file is part of the Heritrix web crawler (crawler.archive.org).
8   *
9   * Heritrix is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU Lesser Public License as published by
11  * the Free Software Foundation; either version 2.1 of the License, or
12  * any later version.
13  *
14  * Heritrix is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17  * GNU Lesser Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser Public License
20  * along with Heritrix; if not, write to the Free Software
21  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
22  */
23  package org.archive.util.ms;
24  
25  import java.io.IOException;
26  
27  import org.archive.io.SeekInputStream;
28  import static org.archive.util.ms.BlockFileSystem.BLOCK_SIZE;
29  
30  
31  /***
32   * InputStream for a file contained in a BlockFileSystem.
33   */
34  public class BlockInputStream extends SeekInputStream {
35  
36  
37      /***
38       * The starting block number.
39       */
40      private int start;
41      
42      
43      /***
44       * The current block.
45       */
46      private int block;
47      
48      
49      /***
50       * The BlockFileSystem that produced this stream.
51       */
52      private BlockFileSystem bfs;
53      
54      
55      /***
56       * The raw input stream of the BlockFileSystem.
57       */
58      private SeekInputStream raw;
59      
60      
61      /***
62       * The current logical position of this stream.
63       */
64      private long position;
65      
66      
67      /***
68       * The current file pointer position of the raw input stream.
69       */
70      private long expectedRawPosition;
71  
72      
73      /***
74       * The number of bytes read in the current block.
75       */
76      private int blockBytesRead;
77  
78  
79      /***
80       * Constructor.
81       * 
82       * @param bfs    The block file system that owns this stream
83       * @param block  The starting block number.
84       */
85      public BlockInputStream(BlockFileSystem bfs, int block) throws IOException {
86          this.raw = bfs.getRawInput();
87          this.bfs = bfs;
88          this.start = block;
89          this.block = block;
90          this.position = 0;
91          seek(block, 0);
92      }
93      
94      
95      
96      private void seek(long block, long rem) throws IOException {
97          assert rem < BLOCK_SIZE;
98          long pos = (block + 1) * BLOCK_SIZE + rem;
99          blockBytesRead = (int)rem;
100         expectedRawPosition = pos;
101         raw.position(pos);
102     }
103     
104     
105     private void ensureRawPosition() throws IOException {
106         if (raw.position() != expectedRawPosition) {
107             raw.position(expectedRawPosition);
108         }
109     }
110     
111     private boolean ensureBuffer() throws IOException {
112         if (block < 0) {
113             return false;
114         }
115         ensureRawPosition();
116         if (blockBytesRead < BLOCK_SIZE) {
117             return true;
118         }
119         block = bfs.getNextBlock(block);
120         if (block < 0) {
121             return false;
122         }
123         seek(block, 0);
124         return true;
125     }
126 
127     
128     public long skip(long v) throws IOException {
129         // FIXME
130         int r = read();
131         return (r < 0) ? 0 : 1;
132     }
133 
134     public int read() throws IOException {
135         if (!ensureBuffer()) {
136             return -1;
137         }
138         int r = raw.read();
139         position++;
140         expectedRawPosition++;
141         blockBytesRead++;
142         return r;
143     }
144     
145     
146     public int read(byte[] b, int ofs, int len) throws IOException {
147         if (!ensureBuffer()) {
148             return 0;
149         }
150         int rem = BLOCK_SIZE - (int)(position % BLOCK_SIZE);
151         len = Math.min(len, rem);
152         int c = raw.read(b, ofs, len);
153         position += c;
154         expectedRawPosition += c;
155         blockBytesRead++;
156         return len;
157     }
158 
159 
160     public int read(byte[] b) throws IOException {
161         return read(b, 0, b.length);
162     }
163 
164 
165     public long position() {
166         return position;
167     }
168 
169 
170     public void position(long v) throws IOException {
171         ensureRawPosition();
172         if (v == position) {
173             return;
174         }
175         
176         // If new position is in same block, just seek.
177         if (v / BLOCK_SIZE == position / BLOCK_SIZE) {
178             long rem = v % BLOCK_SIZE;
179             seek(block, rem);
180             position = v;
181             return;
182         }
183         
184         if (v > position) {
185             seekAfter(v);
186         } else {
187             seekBefore(v);
188         }
189     }
190 
191     
192     private void seekAfter(long v) throws IOException {
193         long currentBlock = position / BLOCK_SIZE;
194         long destBlock = v / BLOCK_SIZE;
195         long blockAdvance = destBlock - currentBlock;
196         for (int i = 0; i < blockAdvance; i++) {
197             block = bfs.getNextBlock(block);
198         }
199         seek(block, v % BLOCK_SIZE);
200         position = v;
201     }
202 
203     
204     private void seekBefore(long v) throws IOException {
205         long blockAdvance = (v - 1) / BLOCK_SIZE;
206         block = start;
207         for (int i = 0; i < blockAdvance; i++) {
208             block = bfs.getNextBlock(block);
209         }
210         seek(block, v % BLOCK_SIZE);
211     }
212 }