Skip to content

Commit aa00974

Browse files
VasekPurchartdg
authored andcommitted
Strings: throw exception on malformed UTF-8 in webalize() and to… (#205)
1 parent 108730b commit aa00974

File tree

3 files changed

+7
-1
lines changed

3 files changed

+7
-1
lines changed

src/Utils/Strings.php

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,7 @@ public static function toAscii(string $s): string
142142
$transliterator = \Transliterator::create('Any-Latin; Latin-ASCII');
143143
}
144144

145-
$s = preg_replace('#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s);
145+
$s = self::pcre('preg_replace', ['#[^\x09\x0A\x0D\x20-\x7E\xA0-\x{2FF}\x{370}-\x{10FFFF}]#u', '', $s]);
146146
$s = strtr($s, '`\'"^~?', "\x01\x02\x03\x04\x05\x06");
147147
$s = str_replace(
148148
["\u{201E}", "\u{201C}", "\u{201D}", "\u{201A}", "\u{2018}", "\u{2019}", "\u{B0}"],

tests/Utils/Strings.toAscii().phpt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ Assert::same('', Strings::toAscii("\u{10000}")); // U+10000
2121
Assert::same('', Strings::toAscii("\u{A4}")); // non-ASCII char
2222
Assert::same('a b', Strings::toAscii("a\u{A0}b")); // non-breaking space
2323
Assert::same('Tarikh', Strings::toAscii("Ta\u{2BE}rikh")); // Taʾrikh
24+
Assert::exception(function () {
25+
Strings::toAscii("0123456789\xFF");
26+
}, Nette\Utils\RegexpException::class, null, PREG_BAD_UTF8_ERROR);
2427

2528
if (class_exists('Transliterator') && \Transliterator::create('Any-Latin; Latin-ASCII')) {
2629
Assert::same('Athena->Moskva', Strings::toAscii("\u{391}\u{3B8}\u{3AE}\u{3BD}\u{3B1}\u{2192}\u{41C}\u{43E}\u{441}\u{43A}\u{432}\u{430}")); // Αθήνα→Москва

tests/Utils/Strings.webalize().phpt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,3 +17,6 @@ Assert::same('zlutoucky-kun-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}
1717
Assert::same('ZLUTOUCKY-KUN-oooo', Strings::webalize("&\u{17D}LU\u{164}OU\u{10C}K\u{DD} K\u{16E}\u{147} \u{F6}\u{151}\u{F4}o!", null, false)); // &ŽLUŤOUČKÝ KŮŇ öőôo!
1818
Assert::same('1-4-!', Strings::webalize("\u{BC} !", '!'));
1919
Assert::same('a-b', Strings::webalize("a\u{A0}b")); // non-breaking space
20+
Assert::exception(function () {
21+
Strings::toAscii("0123456789\xFF");
22+
}, Nette\Utils\RegexpException::class, null, PREG_BAD_UTF8_ERROR);

0 commit comments

Comments
 (0)