Skip to content
15 changes: 15 additions & 0 deletions Lib/test/test_tomllib/test_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,3 +124,18 @@ def test_types_import(self):
never imported by tests.
"""
importlib.import_module(f"{tomllib.__name__}._types")

def test_parse_simple_number(self):
parse_simple_number = tomllib._parser._parse_simple_number
self.assertEqual(parse_simple_number("123", 0), (3, 123))
self.assertEqual(parse_simple_number("123\n", 0), (3, 123))
self.assertEqual(parse_simple_number("0\n", 0), (1, 0))

self.assertIsNone(parse_simple_number("0123\n", 0))
self.assertIsNone(parse_simple_number("123-456\n", 0))
self.assertIsNone(parse_simple_number("123:456\n", 0))
self.assertIsNone(parse_simple_number("1.0\n", 0))
self.assertIsNone(parse_simple_number("1_000\n", 0))
self.assertIsNone(parse_simple_number("x123\n", 0))
self.assertIsNone(parse_simple_number("o123\n", 0))
self.assertIsNone(parse_simple_number("b100\n", 0))
61 changes: 52 additions & 9 deletions Lib/tomllib/_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,22 +6,26 @@

from types import MappingProxyType

from ._re import (
Comment thread
vstinner marked this conversation as resolved.
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
)

TYPE_CHECKING = False
if TYPE_CHECKING:
from collections.abc import Iterable
from typing import IO, Any, Final

from ._types import Key, ParseFloat, Pos

_REGEX_IMPORTED = True
Comment thread
vstinner marked this conversation as resolved.
Outdated
from ._re import (
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
)
else:
# Regular expressions are lazy imported to speed up startup time
_REGEX_IMPORTED = False
Comment thread
vstinner marked this conversation as resolved.
Outdated

ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))

# Neither of these sets include quotation mark or backslash. They are
Expand All @@ -41,6 +45,7 @@
)
KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
_DECDIGIT_CHARS: Final = frozenset("0123456789")

BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
{
Expand Down Expand Up @@ -665,6 +670,25 @@ def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
pos += 1


def _parse_simple_number(
Comment thread
eendebakpt marked this conversation as resolved.
Outdated
src: str, pos: Pos
) -> None | tuple[Pos, int]:
start = pos
src = src.rstrip()
end = len(src)
while src[pos] in _DECDIGIT_CHARS:
pos += 1
if pos >= end:
break
else:
if src[pos] != "\n":
Comment thread
vstinner marked this conversation as resolved.
Outdated
return None
digits = src[start:pos]
if digits.startswith("0") and len(digits) > 1:
return None
return pos, int(digits)


def parse_value(
src: str, pos: Pos, parse_float: ParseFloat
) -> tuple[Pos, Any]:
Expand Down Expand Up @@ -703,6 +727,25 @@ def parse_value(
if char == "{":
return parse_inline_table(src, pos, parse_float)

global _REGEX_IMPORTED, RE_DATETIME, RE_LOCALTIME, RE_NUMBER
global match_to_datetime, match_to_localtime, match_to_number
if not _REGEX_IMPORTED:
# Simple number parser avoiding regex
if char in _DECDIGIT_CHARS:
res = _parse_simple_number(src, pos)
if res is not None:
return res

from ._re import (
RE_DATETIME,
RE_LOCALTIME,
RE_NUMBER,
match_to_datetime,
match_to_localtime,
match_to_number,
)
_REGEX_IMPORTED = True

# Dates and times
datetime_match = RE_DATETIME.match(src, pos)
if datetime_match:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Improve :mod:`tomllib` import time (up to 10x faster). Patch by Victor
Stinner.
Loading