Skip to content

Commit 6606edd

Browse files
committed
some improvements on non 7bit string handling #14
This should read at least some zip files with umlauts and stuff better
1 parent 74d7442 commit 6606edd

4 files changed

Lines changed: 89 additions & 2 deletions

File tree

src/Zip.php

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -295,7 +295,7 @@ public function addFile($file, $fileinfo = '')
295295
}
296296

297297
/**
298-
* Add a file to the current TAR archive using the given $data as content
298+
* Add a file to the current Zip archive using the given $data as content
299299
*
300300
* @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data
301301
* @param string $data binary content of the file to add
@@ -495,8 +495,10 @@ protected function readCentralFileHeader()
495495

496496
if ($header['extra_len'] != 0) {
497497
$header['extra'] = fread($this->fh, $header['extra_len']);
498+
$header['extradata'] = $this->parseExtra($header['extra']);
498499
} else {
499500
$header['extra'] = '';
501+
$header['extradata'] = array();
500502
}
501503

502504
if ($header['comment_len'] != 0) {
@@ -536,8 +538,10 @@ protected function readFileHeader($header)
536538
$header['filename'] = fread($this->fh, $data['filename_len']);
537539
if ($data['extra_len'] != 0) {
538540
$header['extra'] = fread($this->fh, $data['extra_len']);
541+
$header['extradata'] = array_merge($header['extradata'], $this->parseExtra($header['extra']));
539542
} else {
540543
$header['extra'] = '';
544+
$header['extradata'] = array();
541545
}
542546

543547
$header['compression'] = $data['compression'];
@@ -559,6 +563,35 @@ protected function readFileHeader($header)
559563
return $header;
560564
}
561565

566+
/**
567+
* Parse the extra headers into fields
568+
*
569+
* @param string $header
570+
* @return array
571+
*/
572+
protected function parseExtra($header)
573+
{
574+
$extra = array();
575+
// parse all extra fields as raw values
576+
while (strlen($header) !== 0) {
577+
$set = unpack('vid/vlen', $header);
578+
$header = substr($header, 4);
579+
$value = substr($header, 0, $set['len']);
580+
$header = substr($header, $set['len']);
581+
$extra[$set['id']] = $value;
582+
}
583+
584+
// handle known ones
585+
if(isset($extra[0x6375])) {
586+
$extra['utf8comment'] = substr($extra[0x7075], 5); // strip version and crc
587+
}
588+
if(isset($extra[0x7075])) {
589+
$extra['utf8path'] = substr($extra[0x7075], 5); // strip version and crc
590+
}
591+
592+
return $extra;
593+
}
594+
562595
/**
563596
* Create fileinfo object from header data
564597
*
@@ -568,15 +601,48 @@ protected function readFileHeader($header)
568601
protected function header2fileinfo($header)
569602
{
570603
$fileinfo = new FileInfo();
571-
$fileinfo->setPath($header['filename']);
572604
$fileinfo->setSize($header['size']);
573605
$fileinfo->setCompressedSize($header['compressed_size']);
574606
$fileinfo->setMtime($header['mtime']);
575607
$fileinfo->setComment($header['comment']);
576608
$fileinfo->setIsdir($header['external'] == 0x41FF0010 || $header['external'] == 16);
609+
610+
if(isset($header['extradata']['utf8path'])) {
611+
$fileinfo->setPath($header['extradata']['utf8path']);
612+
} else {
613+
$fileinfo->setPath($this->cp2utf8($header['filename']));
614+
}
615+
616+
if(isset($header['extradata']['utf8comment'])) {
617+
$fileinfo->setComment($header['extradata']['utf8comment']);
618+
} else {
619+
$fileinfo->setComment($this->cp2utf8($header['comment']));
620+
}
621+
577622
return $fileinfo;
578623
}
579624

625+
/**
626+
* Convert the given CP437 encoded string to UTF-8
627+
*
628+
* Tries iconv with the correct encoding first, falls back to mbstring with CP850 which is
629+
* similar enough. CP437 seems not to be available in mbstring. Lastly falls back to keeping the
630+
* string as is, which is still better than nothing.
631+
*
632+
* @param $string
633+
* @return string
634+
*/
635+
protected function cp2utf8($string)
636+
{
637+
if (function_exists('iconv')) {
638+
return iconv('CP437', 'UTF-8', $string);
639+
} elseif (function_exists('mb_convert_encoding')) {
640+
return mb_convert_encoding($string, 'UTF-8', 'CP850');
641+
} else {
642+
return $string;
643+
}
644+
}
645+
580646
/**
581647
* Write to the open filepointer or memory
582648
*

tests/zip.test.php

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,27 @@ public function test_excludeextract()
323323
self::rdelete($out);
324324
}
325325

326+
public function test_umlautWinrar()
327+
{
328+
$out = sys_get_temp_dir().'/dwziptest'.md5(time());
329+
330+
$zip = new Zip();
331+
$zip->open(__DIR__ . '/zip/issue14-winrar.zip');
332+
$zip->extract($out);
333+
$this->assertFileExists("$out/tüst.txt");
334+
}
335+
336+
public function test_umlautWindows()
337+
{
338+
$out = sys_get_temp_dir().'/dwziptest'.md5(time());
339+
340+
$zip = new Zip();
341+
$zip->open(__DIR__ . '/zip/issue14-windows.zip');
342+
$zip->extract($out);
343+
$this->assertFileExists("$out/täst.txt");
344+
}
345+
346+
326347
/**
327348
* recursive rmdir()/unlink()
328349
*

tests/zip/issue14-windows.zip

114 Bytes
Binary file not shown.

tests/zip/issue14-winrar.zip

186 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)