Skip to content

Commit 5f464df

Browse files
author
David Pierson
committed
GeoffHart T9121 Multi-member gzip support
1 parent 4330703 commit 5f464df

2 files changed

Lines changed: 124 additions & 98 deletions

File tree

src/GZip/GzipInputStream.cs

Lines changed: 112 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@
3636
// obligated to do so. If you do not wish to do so, delete this
3737
// exception statement from your version.
3838

39+
// HISTORY
40+
// 11-08-2009 GeoffHart T9121 Added Multi-member gzip support
41+
42+
using System;
3943
using System.IO;
4044

4145
using ICSharpCode.SharpZipLib.Checksums;
@@ -78,14 +82,12 @@ public class GZipInputStream : InflaterInputStream
7882
/// <summary>
7983
/// CRC-32 value for uncompressed data
8084
/// </summary>
81-
protected Crc32 crc = new Crc32();
82-
83-
/// <summary>
84-
/// Indicates end of stream
85-
/// </summary>
86-
protected bool eos;
85+
protected Crc32 crc;
8786

88-
// Have we read the GZIP header yet?
87+
/// <summary>
88+
/// Flag to indicate if we've read the GZIP header yet for the current member (block of compressed data).
89+
/// This is tracked per-block as the file is parsed.
90+
/// </summary>
8991
bool readGZIPHeader;
9092
#endregion
9193

@@ -132,41 +134,58 @@ public GZipInputStream(Stream baseInputStream, int size)
132134
/// <returns>Returns the number of bytes actually read.</returns>
133135
public override int Read(byte[] buffer, int offset, int count)
134136
{
135-
// We first have to read the GZIP header, then we feed all the
136-
// rest of the data to the base class.
137-
//
138-
// As we do that we continually update the CRC32. Once the data is
139-
// finished, we check the CRC32
140-
//
141-
// This means we don't need our own buffer, as everything is done
142-
// in baseInputStream the superclass.
143-
if (!readGZIPHeader) {
144-
ReadHeader();
145-
}
146-
147-
if (eos) {
148-
return 0;
149-
}
150-
151-
// We don't have to read the header, so we just grab data from the superclass
152-
int bytesRead = base.Read(buffer, offset, count);
153-
if (bytesRead > 0) {
154-
crc.Update(buffer, offset, bytesRead);
155-
}
156-
157-
if (inf.IsFinished) {
158-
ReadFooter();
137+
// A GZIP file can contain multiple blocks of compressed data, although this is quite rare.
138+
// A compressed block could potentially be empty, so we need to loop until we reach EOF or
139+
// we find data.
140+
while (true) {
141+
142+
// If we haven't read the header for this block, read it
143+
if (! readGZIPHeader) {
144+
145+
// Try to read header. If there is no header (0 bytes available), this is EOF. If there is
146+
// an incomplete header, this will throw an exception.
147+
if (! ReadHeader()) {
148+
return 0;
149+
}
150+
}
151+
152+
// Try to read compressed data
153+
int bytesRead = base.Read(buffer, offset, count);
154+
if (bytesRead > 0) {
155+
crc.Update(buffer, offset, bytesRead);
156+
}
157+
158+
// If this is the end of stream, read the footer
159+
if (inf.IsFinished) {
160+
ReadFooter();
161+
}
162+
163+
if (bytesRead > 0) {
164+
return bytesRead;
165+
}
159166
}
160-
return bytesRead;
161167
}
162168
#endregion
163169

164170
#region Support routines
165-
void ReadHeader()
171+
bool ReadHeader()
166172
{
173+
// Initialize CRC for this block
174+
crc = new Crc32();
175+
176+
// Make sure there is data in file. We can't rely on ReadLeByte() to fill the buffer, as this could be EOF,
177+
// which is fine, but ReadLeByte() throws an exception if it doesn't find data, so we do this part ourselves.
178+
if (inputBuffer.Available <= 0) {
179+
inputBuffer.Fill();
180+
if (inputBuffer.Available <= 0) {
181+
// No header, EOF.
182+
return false;
183+
}
184+
}
185+
167186
// 1. Check the two magic bytes
168187
Crc32 headCRC = new Crc32();
169-
int magic = baseInputStream.ReadByte();
188+
int magic = inputBuffer.ReadLeByte();
170189

171190
if (magic < 0) {
172191
throw new EndOfStreamException("EOS reading GZIP header");
@@ -176,188 +195,189 @@ void ReadHeader()
176195
if (magic != (GZipConstants.GZIP_MAGIC >> 8)) {
177196
throw new GZipException("Error GZIP header, first magic byte doesn't match");
178197
}
179-
180-
magic = baseInputStream.ReadByte();
198+
199+
//magic = baseInputStream.ReadByte();
200+
magic = inputBuffer.ReadLeByte();
181201

182202
if (magic < 0) {
183203
throw new EndOfStreamException("EOS reading GZIP header");
184204
}
185-
205+
186206
if (magic != (GZipConstants.GZIP_MAGIC & 0xFF)) {
187207
throw new GZipException("Error GZIP header, second magic byte doesn't match");
188208
}
189209

190210
headCRC.Update(magic);
191-
211+
192212
// 2. Check the compression type (must be 8)
193-
int compressionType = baseInputStream.ReadByte();
213+
int compressionType = inputBuffer.ReadLeByte();
194214

195215
if ( compressionType < 0 ) {
196216
throw new EndOfStreamException("EOS reading GZIP header");
197217
}
198-
218+
199219
if ( compressionType != 8 ) {
200220
throw new GZipException("Error GZIP header, data not in deflate format");
201221
}
202222
headCRC.Update(compressionType);
203-
223+
204224
// 3. Check the flags
205-
int flags = baseInputStream.ReadByte();
225+
int flags = inputBuffer.ReadLeByte();
206226
if (flags < 0) {
207227
throw new EndOfStreamException("EOS reading GZIP header");
208228
}
209229
headCRC.Update(flags);
210-
230+
211231
/* This flag byte is divided into individual bits as follows:
212-
213-
bit 0 FTEXT
214-
bit 1 FHCRC
215-
bit 2 FEXTRA
216-
bit 3 FNAME
217-
bit 4 FCOMMENT
218-
bit 5 reserved
219-
bit 6 reserved
220-
bit 7 reserved
232+
233+
bit 0 FTEXT
234+
bit 1 FHCRC
235+
bit 2 FEXTRA
236+
bit 3 FNAME
237+
bit 4 FCOMMENT
238+
bit 5 reserved
239+
bit 6 reserved
240+
bit 7 reserved
221241
*/
222-
242+
223243
// 3.1 Check the reserved bits are zero
224-
244+
225245
if ((flags & 0xE0) != 0) {
226246
throw new GZipException("Reserved flag bits in GZIP header != 0");
227247
}
228-
248+
229249
// 4.-6. Skip the modification time, extra flags, and OS type
230250
for (int i=0; i< 6; i++) {
231-
int readByte = baseInputStream.ReadByte();
251+
int readByte = inputBuffer.ReadLeByte();
232252
if (readByte < 0) {
233253
throw new EndOfStreamException("EOS reading GZIP header");
234254
}
235255
headCRC.Update(readByte);
236256
}
237-
257+
238258
// 7. Read extra field
239259
if ((flags & GZipConstants.FEXTRA) != 0) {
240260
// Skip subfield id
241261
for (int i=0; i< 2; i++) {
242-
int readByte = baseInputStream.ReadByte();
262+
int readByte = inputBuffer.ReadLeByte();
243263
if (readByte < 0) {
244264
throw new EndOfStreamException("EOS reading GZIP header");
245265
}
246266
headCRC.Update(readByte);
247267
}
248268

249-
if (baseInputStream.ReadByte() < 0 || baseInputStream.ReadByte() < 0) {
269+
if (inputBuffer.ReadLeByte() < 0 || inputBuffer.ReadLeByte() < 0) {
250270
throw new EndOfStreamException("EOS reading GZIP header");
251271
}
252-
272+
253273
int len1, len2;
254-
len1 = baseInputStream.ReadByte();
255-
len2 = baseInputStream.ReadByte();
274+
len1 = inputBuffer.ReadLeByte();
275+
len2 = inputBuffer.ReadLeByte();
256276
if ((len1 < 0) || (len2 < 0)) {
257277
throw new EndOfStreamException("EOS reading GZIP header");
258278
}
259279
headCRC.Update(len1);
260280
headCRC.Update(len2);
261-
281+
262282
int extraLen = (len1 << 8) | len2;
263283
for (int i = 0; i < extraLen;i++) {
264-
int readByte = baseInputStream.ReadByte();
284+
int readByte = inputBuffer.ReadLeByte();
265285
if (readByte < 0)
266286
{
267287
throw new EndOfStreamException("EOS reading GZIP header");
268288
}
269289
headCRC.Update(readByte);
270290
}
271291
}
272-
292+
273293
// 8. Read file name
274294
if ((flags & GZipConstants.FNAME) != 0) {
275295
int readByte;
276-
while ( (readByte = baseInputStream.ReadByte()) > 0) {
296+
while ( (readByte = inputBuffer.ReadLeByte()) > 0) {
277297
headCRC.Update(readByte);
278298
}
279-
299+
280300
if (readByte < 0) {
281301
throw new EndOfStreamException("EOS reading GZIP header");
282302
}
283303
headCRC.Update(readByte);
284304
}
285-
305+
286306
// 9. Read comment
287307
if ((flags & GZipConstants.FCOMMENT) != 0) {
288308
int readByte;
289-
while ( (readByte = baseInputStream.ReadByte()) > 0) {
309+
while ( (readByte = inputBuffer.ReadLeByte()) > 0) {
290310
headCRC.Update(readByte);
291311
}
292-
312+
293313
if (readByte < 0) {
294314
throw new EndOfStreamException("EOS reading GZIP header");
295315
}
296316

297317
headCRC.Update(readByte);
298318
}
299-
319+
300320
// 10. Read header CRC
301321
if ((flags & GZipConstants.FHCRC) != 0) {
302322
int tempByte;
303-
int crcval = baseInputStream.ReadByte();
323+
int crcval = inputBuffer.ReadLeByte();
304324
if (crcval < 0) {
305325
throw new EndOfStreamException("EOS reading GZIP header");
306326
}
307-
308-
tempByte = baseInputStream.ReadByte();
327+
328+
tempByte = inputBuffer.ReadLeByte();
309329
if (tempByte < 0) {
310330
throw new EndOfStreamException("EOS reading GZIP header");
311331
}
312-
332+
313333
crcval = (crcval << 8) | tempByte;
314334
if (crcval != ((int) headCRC.Value & 0xffff)) {
315335
throw new GZipException("Header CRC value mismatch");
316336
}
317337
}
318-
338+
319339
readGZIPHeader = true;
340+
return true;
320341
}
321342

322343
void ReadFooter()
323344
{
324345
byte[] footer = new byte[8];
325-
int avail = inf.RemainingInput;
326-
327-
if (avail > 8) {
328-
avail = 8;
329-
}
330-
331-
System.Array.Copy(inputBuffer.RawData, inputBuffer.RawLength - inf.RemainingInput, footer, 0, avail);
332-
int needed = 8 - avail;
333-
346+
347+
// End of stream; reclaim all bytes from inf, read the final byte count, and reset the inflator
348+
long bytesRead = inf.TotalOut & 0xffffffff;
349+
inputBuffer.Available += inf.RemainingInput;
350+
inf.Reset();
351+
352+
// Read footer from inputBuffer
353+
int needed = 8;
334354
while (needed > 0) {
335-
int count = baseInputStream.Read(footer, 8 - needed, needed);
355+
int count = inputBuffer.ReadClearTextBuffer(footer, 8 - needed, needed);
336356
if (count <= 0) {
337357
throw new EndOfStreamException("EOS reading GZIP footer");
338358
}
339359
needed -= count; // Jewel Jan 16
340360
}
341361

362+
// Calculate CRC
342363
int crcval = (footer[0] & 0xff) | ((footer[1] & 0xff) << 8) | ((footer[2] & 0xff) << 16) | (footer[3] << 24);
343364
if (crcval != (int) crc.Value) {
344365
throw new GZipException("GZIP crc sum mismatch, theirs \"" + crcval + "\" and ours \"" + (int) crc.Value);
345366
}
346-
367+
347368
// NOTE The total here is the original total modulo 2 ^ 32.
348369
uint total =
349370
(uint)((uint)footer[4] & 0xff) |
350371
(uint)(((uint)footer[5] & 0xff) << 8) |
351372
(uint)(((uint)footer[6] & 0xff) << 16) |
352373
(uint)((uint)footer[7] << 24);
353374

354-
if ((inf.TotalOut & 0xffffffff) != total) {
375+
if (bytesRead != total) {
355376
throw new GZipException("Number of bytes mismatch in footer");
356377
}
357-
358-
// Should we support multiple gzip members.
359-
// Difficult, since there may be some bytes still in baseInputStream dataBuffer
360-
eos = true;
378+
379+
// Mark header read as false so if another header exists, we'll continue reading through the file
380+
readGZIPHeader = false;
361381
}
362382
#endregion
363383
}

0 commit comments

Comments
 (0)