3636// obligated to do so. If you do not wish to do so, delete this
3737// exception statement from your version.
3838
39+ // HISTORY
40+ // 11-08-2009 GeoffHart T9121 Added Multi-member gzip support
41+
42+ using System ;
3943using System . IO ;
4044
4145using ICSharpCode . SharpZipLib . Checksums ;
@@ -78,14 +82,12 @@ public class GZipInputStream : InflaterInputStream
7882 /// <summary>
7983 /// CRC-32 value for uncompressed data
8084 /// </summary>
81- protected Crc32 crc = new Crc32 ( ) ;
82-
83- /// <summary>
84- /// Indicates end of stream
85- /// </summary>
86- protected bool eos ;
85+ protected Crc32 crc ;
8786
88- // Have we read the GZIP header yet?
87+ /// <summary>
88+ /// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data).
89+ /// This is tracked per-block as the file is parsed.
90+ /// </summary>
8991 bool readGZIPHeader ;
9092 #endregion
9193
@@ -132,41 +134,58 @@ public GZipInputStream(Stream baseInputStream, int size)
132134 /// <returns>Returns the number of bytes actually read.</returns>
133135 public override int Read ( byte [ ] buffer , int offset , int count )
134136 {
135- // We first have to read the GZIP header, then we feed all the
136- // rest of the data to the base class.
137- //
138- // As we do that we continually update the CRC32. Once the data is
139- // finished, we check the CRC32
140- //
141- // This means we don't need our own buffer, as everything is done
142- // in baseInputStream the superclass.
143- if ( ! readGZIPHeader ) {
144- ReadHeader ( ) ;
145- }
146-
147- if ( eos ) {
148- return 0 ;
149- }
150-
151- // We don't have to read the header, so we just grab data from the superclass
152- int bytesRead = base . Read ( buffer , offset , count ) ;
153- if ( bytesRead > 0 ) {
154- crc . Update ( buffer , offset , bytesRead ) ;
155- }
156-
157- if ( inf . IsFinished ) {
158- ReadFooter ( ) ;
137+ // A GZIP file can contain multiple blocks of compressed data, although this is quite rare.
138+ // A compressed block could potentially be empty, so we need to loop until we reach EOF or
139+ // we find data.
140+ while ( true ) {
141+
142+ // If we haven't read the header for this block, read it
143+ if ( ! readGZIPHeader ) {
144+
145+ // Try to read header. If there is no header (0 bytes available), this is EOF. If there is
146+ // an incomplete header, this will throw an exception.
147+ if ( ! ReadHeader ( ) ) {
148+ return 0 ;
149+ }
150+ }
151+
152+ // Try to read compressed data
153+ int bytesRead = base . Read ( buffer , offset , count ) ;
154+ if ( bytesRead > 0 ) {
155+ crc . Update ( buffer , offset , bytesRead ) ;
156+ }
157+
158+ // If this is the end of stream, read the footer
159+ if ( inf . IsFinished ) {
160+ ReadFooter ( ) ;
161+ }
162+
163+ if ( bytesRead > 0 ) {
164+ return bytesRead ;
165+ }
159166 }
160- return bytesRead ;
161167 }
162168 #endregion
163169
164170 #region Support routines
165- void ReadHeader ( )
171+ bool ReadHeader ( )
166172 {
173+ // Initialize CRC for this block
174+ crc = new Crc32 ( ) ;
175+
176+ // Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF,
177+ // which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves.
178+ if ( inputBuffer . Available <= 0 ) {
179+ inputBuffer . Fill ( ) ;
180+ if ( inputBuffer . Available <= 0 ) {
181+ // No header, EOF.
182+ return false ;
183+ }
184+ }
185+
167186 // 1. Check the two magic bytes
168187 Crc32 headCRC = new Crc32 ( ) ;
169- int magic = baseInputStream . ReadByte ( ) ;
188+ int magic = inputBuffer . ReadLeByte ( ) ;
170189
171190 if ( magic < 0 ) {
172191 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
@@ -176,188 +195,189 @@ void ReadHeader()
176195 if ( magic != ( GZipConstants . GZIP_MAGIC >> 8 ) ) {
177196 throw new GZipException ( "Error GZIP header, first magic byte doesn't match" ) ;
178197 }
179-
180- magic = baseInputStream . ReadByte ( ) ;
198+
199+ //magic = baseInputStream.ReadByte();
200+ magic = inputBuffer . ReadLeByte ( ) ;
181201
182202 if ( magic < 0 ) {
183203 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
184204 }
185-
205+
186206 if ( magic != ( GZipConstants . GZIP_MAGIC & 0xFF ) ) {
187207 throw new GZipException ( "Error GZIP header, second magic byte doesn't match" ) ;
188208 }
189209
190210 headCRC . Update ( magic ) ;
191-
211+
192212 // 2. Check the compression type (must be 8)
193- int compressionType = baseInputStream . ReadByte ( ) ;
213+ int compressionType = inputBuffer . ReadLeByte ( ) ;
194214
195215 if ( compressionType < 0 ) {
196216 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
197217 }
198-
218+
199219 if ( compressionType != 8 ) {
200220 throw new GZipException ( "Error GZIP header, data not in deflate format" ) ;
201221 }
202222 headCRC . Update ( compressionType ) ;
203-
223+
204224 // 3. Check the flags
205- int flags = baseInputStream . ReadByte ( ) ;
225+ int flags = inputBuffer . ReadLeByte ( ) ;
206226 if ( flags < 0 ) {
207227 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
208228 }
209229 headCRC . Update ( flags ) ;
210-
230+
211231 /* This flag byte is divided into individual bits as follows:
212-
213- bit 0 FTEXT
214- bit 1 FHCRC
215- bit 2 FEXTRA
216- bit 3 FNAME
217- bit 4 FCOMMENT
218- bit 5 reserved
219- bit 6 reserved
220- bit 7 reserved
232+
233+ bit 0 FTEXT
234+ bit 1 FHCRC
235+ bit 2 FEXTRA
236+ bit 3 FNAME
237+ bit 4 FCOMMENT
238+ bit 5 reserved
239+ bit 6 reserved
240+ bit 7 reserved
221241 */
222-
242+
223243 // 3.1 Check the reserved bits are zero
224-
244+
225245 if ( ( flags & 0xE0 ) != 0 ) {
226246 throw new GZipException ( "Reserved flag bits in GZIP header != 0" ) ;
227247 }
228-
248+
229249 // 4.-6. Skip the modification time, extra flags, and OS type
230250 for ( int i = 0 ; i < 6 ; i ++ ) {
231- int readByte = baseInputStream . ReadByte ( ) ;
251+ int readByte = inputBuffer . ReadLeByte ( ) ;
232252 if ( readByte < 0 ) {
233253 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
234254 }
235255 headCRC . Update ( readByte ) ;
236256 }
237-
257+
238258 // 7. Read extra field
239259 if ( ( flags & GZipConstants . FEXTRA ) != 0 ) {
240260 // Skip subfield id
241261 for ( int i = 0 ; i < 2 ; i ++ ) {
242- int readByte = baseInputStream . ReadByte ( ) ;
262+ int readByte = inputBuffer . ReadLeByte ( ) ;
243263 if ( readByte < 0 ) {
244264 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
245265 }
246266 headCRC . Update ( readByte ) ;
247267 }
248268
249- if ( baseInputStream . ReadByte ( ) < 0 || baseInputStream . ReadByte ( ) < 0 ) {
269+ if ( inputBuffer . ReadLeByte ( ) < 0 || inputBuffer . ReadLeByte ( ) < 0 ) {
250270 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
251271 }
252-
272+
253273 int len1 , len2 ;
254- len1 = baseInputStream . ReadByte ( ) ;
255- len2 = baseInputStream . ReadByte ( ) ;
274+ len1 = inputBuffer . ReadLeByte ( ) ;
275+ len2 = inputBuffer . ReadLeByte ( ) ;
256276 if ( ( len1 < 0 ) || ( len2 < 0 ) ) {
257277 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
258278 }
259279 headCRC . Update ( len1 ) ;
260280 headCRC . Update ( len2 ) ;
261-
281+
262282 int extraLen = ( len1 << 8 ) | len2 ;
263283 for ( int i = 0 ; i < extraLen ; i ++ ) {
264- int readByte = baseInputStream . ReadByte ( ) ;
284+ int readByte = inputBuffer . ReadLeByte ( ) ;
265285 if ( readByte < 0 )
266286 {
267287 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
268288 }
269289 headCRC . Update ( readByte ) ;
270290 }
271291 }
272-
292+
273293 // 8. Read file name
274294 if ( ( flags & GZipConstants . FNAME ) != 0 ) {
275295 int readByte ;
276- while ( ( readByte = baseInputStream . ReadByte ( ) ) > 0 ) {
296+ while ( ( readByte = inputBuffer . ReadLeByte ( ) ) > 0 ) {
277297 headCRC . Update ( readByte ) ;
278298 }
279-
299+
280300 if ( readByte < 0 ) {
281301 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
282302 }
283303 headCRC . Update ( readByte ) ;
284304 }
285-
305+
286306 // 9. Read comment
287307 if ( ( flags & GZipConstants . FCOMMENT ) != 0 ) {
288308 int readByte ;
289- while ( ( readByte = baseInputStream . ReadByte ( ) ) > 0 ) {
309+ while ( ( readByte = inputBuffer . ReadLeByte ( ) ) > 0 ) {
290310 headCRC . Update ( readByte ) ;
291311 }
292-
312+
293313 if ( readByte < 0 ) {
294314 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
295315 }
296316
297317 headCRC . Update ( readByte ) ;
298318 }
299-
319+
300320 // 10. Read header CRC
301321 if ( ( flags & GZipConstants . FHCRC ) != 0 ) {
302322 int tempByte ;
303- int crcval = baseInputStream . ReadByte ( ) ;
323+ int crcval = inputBuffer . ReadLeByte ( ) ;
304324 if ( crcval < 0 ) {
305325 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
306326 }
307-
308- tempByte = baseInputStream . ReadByte ( ) ;
327+
328+ tempByte = inputBuffer . ReadLeByte ( ) ;
309329 if ( tempByte < 0 ) {
310330 throw new EndOfStreamException ( "EOS reading GZIP header" ) ;
311331 }
312-
332+
313333 crcval = ( crcval << 8 ) | tempByte ;
314334 if ( crcval != ( ( int ) headCRC . Value & 0xffff ) ) {
315335 throw new GZipException ( "Header CRC value mismatch" ) ;
316336 }
317337 }
318-
338+
319339 readGZIPHeader = true ;
340+ return true ;
320341 }
321342
322343 void ReadFooter ( )
323344 {
324345 byte [ ] footer = new byte [ 8 ] ;
325- int avail = inf . RemainingInput ;
326-
327- if ( avail > 8 ) {
328- avail = 8 ;
329- }
330-
331- System . Array . Copy ( inputBuffer . RawData , inputBuffer . RawLength - inf . RemainingInput , footer , 0 , avail ) ;
332- int needed = 8 - avail ;
333-
346+
347+ // End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator
348+ long bytesRead = inf . TotalOut & 0xffffffff ;
349+ inputBuffer . Available += inf . RemainingInput ;
350+ inf . Reset ( ) ;
351+
352+ // Read footer from inputBuffer
353+ int needed = 8 ;
334354 while ( needed > 0 ) {
335- int count = baseInputStream . Read ( footer , 8 - needed , needed ) ;
355+ int count = inputBuffer . ReadClearTextBuffer ( footer , 8 - needed , needed ) ;
336356 if ( count <= 0 ) {
337357 throw new EndOfStreamException ( "EOS reading GZIP footer" ) ;
338358 }
339359 needed -= count ; // Jewel Jan 16
340360 }
341361
362+ // Calculate CRC
342363 int crcval = ( footer [ 0 ] & 0xff ) | ( ( footer [ 1 ] & 0xff ) << 8 ) | ( ( footer [ 2 ] & 0xff ) << 16 ) | ( footer [ 3 ] << 24 ) ;
343364 if ( crcval != ( int ) crc . Value ) {
344365 throw new GZipException ( "GZIP crc sum mismatch, theirs \" " + crcval + "\" and ours \" " + ( int ) crc . Value ) ;
345366 }
346-
367+
347368 // NOTE The total here is the original total modulo 2 ^ 32.
348369 uint total =
349370 ( uint ) ( ( uint ) footer [ 4 ] & 0xff ) |
350371 ( uint ) ( ( ( uint ) footer [ 5 ] & 0xff ) << 8 ) |
351372 ( uint ) ( ( ( uint ) footer [ 6 ] & 0xff ) << 16 ) |
352373 ( uint ) ( ( uint ) footer [ 7 ] << 24 ) ;
353374
354- if ( ( inf . TotalOut & 0xffffffff ) != total ) {
375+ if ( bytesRead != total ) {
355376 throw new GZipException ( "Number of bytes mismatch in footer" ) ;
356377 }
357-
358- // Should we support multiple gzip members.
359- // Difficult, since there may be some bytes still in baseInputStream dataBuffer
360- eos = true ;
378+
379+ // Mark header read as false so if another header exists, we'll continue reading through the file
380+ readGZIPHeader = false ;
361381 }
362382 #endregion
363383 }
0 commit comments