Skip to content

Commit a72a6e5

Browse files
committed
properly encode UTF-8 filenames in zip files #14
Uses the Info-ZIP Unicode Path Extra Field 0x7075
1 parent 6606edd commit a72a6e5

2 files changed

Lines changed: 91 additions & 10 deletions

File tree

src/Zip.php

Lines changed: 61 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,7 @@ public function create($file = '')
272272
* Add a file to the current archive using an existing file in the filesystem
273273
*
274274
* @param string $file path to the original file
275-
* @param string|FileInfo $fileinfo either the name to us in archive (string) or a FileInfo oject with all meta data, empty to take from original
275+
* @param string|FileInfo $fileinfo either the name to use in archive (string) or a FileInfo oject with all meta data, empty to take from original
276276
* @throws ArchiveIOException
277277
*/
278278
public function addFile($file, $fileinfo = '')
@@ -610,13 +610,13 @@ protected function header2fileinfo($header)
610610
if(isset($header['extradata']['utf8path'])) {
611611
$fileinfo->setPath($header['extradata']['utf8path']);
612612
} else {
613-
$fileinfo->setPath($this->cp2utf8($header['filename']));
613+
$fileinfo->setPath($this->cpToUtf8($header['filename']));
614614
}
615615

616616
if(isset($header['extradata']['utf8comment'])) {
617617
$fileinfo->setComment($header['extradata']['utf8comment']);
618618
} else {
619-
$fileinfo->setComment($this->cp2utf8($header['comment']));
619+
$fileinfo->setComment($this->cpToUtf8($header['comment']));
620620
}
621621

622622
return $fileinfo;
@@ -632,7 +632,7 @@ protected function header2fileinfo($header)
632632
* @param $string
633633
* @return string
634634
*/
635-
protected function cp2utf8($string)
635+
protected function cpToUtf8($string)
636636
{
637637
if (function_exists('iconv')) {
638638
return iconv('CP437', 'UTF-8', $string);
@@ -643,6 +643,26 @@ protected function cp2utf8($string)
643643
}
644644
}
645645

646+
/**
647+
* Convert the given UTF-8 encoded string to CP437
648+
*
649+
* Same caveats as for cpToUtf8() apply
650+
*
651+
* @param $string
652+
* @return string
653+
*/
654+
protected function utf8ToCp($string)
655+
{
656+
if (function_exists('iconv')) {
657+
return iconv('UTF-8', 'CP437', $string);
658+
} elseif (function_exists('mb_convert_encoding')) {
659+
return mb_convert_encoding($string, 'CP850', 'UTF-8');
660+
} else {
661+
return $string;
662+
}
663+
}
664+
665+
646666
/**
647667
* Write to the open filepointer or memory
648668
*
@@ -750,6 +770,8 @@ protected function makeCentralFileRecord($offset, $ts, $crc, $len, $clen, $name,
750770
$comp = $comp ? 8 : 0;
751771
$dtime = dechex($this->makeDosTime($ts));
752772

773+
list($name, $extra) = $this->encodeFilename($name);
774+
753775
$header = "\x50\x4b\x01\x02"; // central file header signature
754776
$header .= pack('v', 14); // version made by - VFAT
755777
$header .= pack('v', 20); // version needed to extract - 2.0
@@ -766,13 +788,14 @@ protected function makeCentralFileRecord($offset, $ts, $crc, $len, $clen, $name,
766788
$header .= pack('V', $clen); // compressed size
767789
$header .= pack('V', $len); // uncompressed size
768790
$header .= pack('v', strlen($name)); // file name length
769-
$header .= pack('v', 0); // extra field length
791+
$header .= pack('v', strlen($extra)); // extra field length
770792
$header .= pack('v', 0); // file comment length
771793
$header .= pack('v', 0); // disk number start
772794
$header .= pack('v', 0); // internal file attributes
773795
$header .= pack('V', 0); // external file attributes @todo was 0x32!?
774796
$header .= pack('V', $offset); // relative offset of local header
775797
$header .= $name; // file name
798+
$header .= $extra; // extra (utf-8 filename)
776799

777800
return $header;
778801
}
@@ -794,6 +817,8 @@ protected function makeLocalFileHeader($ts, $crc, $len, $clen, $name, $comp = nu
794817
$comp = $comp ? 8 : 0;
795818
$dtime = dechex($this->makeDosTime($ts));
796819

820+
list($name, $extra) = $this->encodeFilename($name);
821+
797822
$header = "\x50\x4b\x03\x04"; // local file header signature
798823
$header .= pack('v', 20); // version needed to extract - 2.0
799824
$header .= pack('v', 0); // general purpose flag - no flags set
@@ -809,8 +834,37 @@ protected function makeLocalFileHeader($ts, $crc, $len, $clen, $name, $comp = nu
809834
$header .= pack('V', $clen); // compressed size
810835
$header .= pack('V', $len); // uncompressed size
811836
$header .= pack('v', strlen($name)); // file name length
812-
$header .= pack('v', 0); // extra field length
813-
$header .= $name;
837+
$header .= pack('v', strlen($extra)); // extra field length
838+
$header .= $name; // file name
839+
$header .= $extra; // extra (utf-8 filename)
814840
return $header;
815841
}
842+
843+
/**
844+
* Returns an allowed filename and an extra field header
845+
*
846+
* When encoding stuff outside the 7bit ASCII range it needs to be placed in a separate
847+
* extra field
848+
*
849+
* @param $original
850+
* @return array($filename, $extra)
851+
*/
852+
protected function encodeFilename($original)
853+
{
854+
$cp437 = $this->utf8ToCp($original);
855+
if ($cp437 === $original) {
856+
return array($original, '');
857+
}
858+
859+
$extra = pack(
860+
'vvCV',
861+
0x7075, // tag
862+
strlen($original) + 5, // length of file + version + crc
863+
1, // version
864+
crc32($original) // crc
865+
);
866+
$extra.= $original;
867+
868+
return array($cp437, $extra);
869+
}
816870
}

tests/zip.test.php

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,9 @@ public function test_createfile()
6969

7070
$zip->create($tmp);
7171
$zip->setCompression(0);
72-
$zip->AddFile("$dir/testdata1.txt", "$dir/testdata1.txt", 0);
73-
$zip->AddFile("$dir/foobar/testdata2.txt", 'noway/testdata2.txt', 0);
74-
$zip->addData('another/testdata3.txt', 'testcontent3', 0, 0);
72+
$zip->addFile("$dir/testdata1.txt", "$dir/testdata1.txt");
73+
$zip->addFile("$dir/foobar/testdata2.txt", 'noway/testdata2.txt');
74+
$zip->addData('another/testdata3.txt', 'testcontent3');
7575
$zip->close();
7676

7777
$this->assertTrue(filesize($tmp) > 30); //arbitrary non-zero number
@@ -94,6 +94,9 @@ public function test_createfile()
9494

9595
$this->assertTrue(strpos($data, "foobar") === false, 'Path not in ZIP');
9696

97+
$this->nativeCheck($tmp);
98+
$this->native7ZipCheck($tmp);
99+
97100
@unlink($tmp);
98101
}
99102

@@ -152,6 +155,30 @@ public function test_dogfood()
152155
unlink($archive);
153156
}
154157

158+
public function test_utf8() {
159+
$archive = sys_get_temp_dir() . '/dwziptest' . md5(time()) . '.zip';
160+
$extract = sys_get_temp_dir() . '/dwziptest' . md5(time() + 1);
161+
162+
$zip = new Zip();
163+
$zip->create($archive);
164+
$zip->addData('tüst.txt', 'test');
165+
$zip->close();
166+
$this->assertFileExists($archive);
167+
168+
$zip = new Zip();
169+
$zip->open($archive);
170+
$zip->extract($extract);
171+
172+
$this->assertFileExists($extract.'/tüst.txt');
173+
174+
$this->nativeCheck($archive);
175+
$this->native7ZipCheck($archive);
176+
177+
self::rdelete($extract);
178+
unlink($archive);
179+
}
180+
181+
155182
/**
156183
* Test the given archive with a native zip installation (if available)
157184
*

0 commit comments

Comments
 (0)