Skip to content

Commit 967f5d1

Browse files
authored
Allow specifying an explicit format in parse_date/parse_time (#1131)
* Allow specifying an explicit format in parse_date/parse_time * Improve docstring
1 parent 2d34ef5 commit 967f5d1

2 files changed

Lines changed: 49 additions & 10 deletions

File tree

babel/dates.py

Lines changed: 37 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1196,13 +1196,20 @@ class ParseError(ValueError):
11961196
def parse_date(
11971197
string: str,
11981198
locale: Locale | str | None = LC_TIME,
1199-
format: _PredefinedTimeFormat = 'medium',
1199+
format: _PredefinedTimeFormat | str = 'medium',
12001200
) -> datetime.date:
12011201
"""Parse a date from a string.
12021202
1203-
This function first tries to interpret the string as ISO-8601
1204-
date format, then uses the date format for the locale as a hint to
1205-
determine the order in which the date fields appear in the string.
1203+
If an explicit format is provided, it is used to parse the date.
1204+
1205+
>>> parse_date('01.04.2004', format='dd.MM.yyyy')
1206+
datetime.date(2004, 4, 1)
1207+
1208+
If no format is given, or if it is one of "full", "long", "medium",
1209+
or "short", the function first tries to interpret the string as
1210+
ISO-8601 date format and then uses the date format for the locale
1211+
as a hint to determine the order in which the date fields appear in
1212+
the string.
12061213
12071214
>>> parse_date('4/1/04', locale='en_US')
12081215
datetime.date(2004, 4, 1)
@@ -1212,26 +1219,35 @@ def parse_date(
12121219
datetime.date(2004, 4, 1)
12131220
>>> parse_date('2004-04-01', locale='de_DE')
12141221
datetime.date(2004, 4, 1)
1222+
>>> parse_date('01.04.04', locale='de_DE', format='short')
1223+
datetime.date(2004, 4, 1)
12151224
12161225
:param string: the string containing the date
12171226
:param locale: a `Locale` object or a locale identifier
1218-
:param format: the format to use (see ``get_date_format``)
1227+
:param format: the format to use, either an explicit date format,
1228+
or one of "full", "long", "medium", or "short"
1229+
(see ``get_time_format``)
12191230
"""
12201231
numbers = re.findall(r'(\d+)', string)
12211232
if not numbers:
12221233
raise ParseError("No numbers were found in input")
12231234

1235+
use_predefined_format = format in ('full', 'long', 'medium', 'short')
12241236
# we try ISO-8601 format first, meaning similar to formats
12251237
# extended YYYY-MM-DD or basic YYYYMMDD
12261238
iso_alike = re.match(r'^(\d{4})-?([01]\d)-?([0-3]\d)$',
12271239
string, flags=re.ASCII) # allow only ASCII digits
1228-
if iso_alike:
1240+
if iso_alike and use_predefined_format:
12291241
try:
12301242
return datetime.date(*map(int, iso_alike.groups()))
12311243
except ValueError:
12321244
pass # a locale format might fit better, so let's continue
12331245

1234-
format_str = get_date_format(format=format, locale=locale).pattern.lower()
1246+
if use_predefined_format:
1247+
fmt = get_date_format(format=format, locale=locale)
1248+
else:
1249+
fmt = parse_pattern(format)
1250+
format_str = fmt.pattern.lower()
12351251
year_idx = format_str.index('y')
12361252
month_idx = format_str.find('m')
12371253
if month_idx < 0:
@@ -1256,19 +1272,26 @@ def parse_date(
12561272
def parse_time(
12571273
string: str,
12581274
locale: Locale | str | None = LC_TIME,
1259-
format: _PredefinedTimeFormat = 'medium',
1275+
format: _PredefinedTimeFormat | str = 'medium',
12601276
) -> datetime.time:
12611277
"""Parse a time from a string.
12621278
12631279
This function uses the time format for the locale as a hint to determine
12641280
the order in which the time fields appear in the string.
12651281
1282+
If an explicit format is provided, the function will use it to parse
1283+
the time instead.
1284+
12661285
>>> parse_time('15:30:00', locale='en_US')
12671286
datetime.time(15, 30)
1287+
>>> parse_time('15:30:00', format='H:mm:ss')
1288+
datetime.time(15, 30)
12681289
12691290
:param string: the string containing the time
12701291
:param locale: a `Locale` object or a locale identifier
1271-
:param format: the format to use (see ``get_time_format``)
1292+
:param format: the format to use, either an explicit time format,
1293+
or one of "full", "long", "medium", or "short"
1294+
(see ``get_time_format``)
12721295
:return: the parsed time
12731296
:rtype: `time`
12741297
"""
@@ -1277,7 +1300,11 @@ def parse_time(
12771300
raise ParseError("No numbers were found in input")
12781301

12791302
# TODO: try ISO format first?
1280-
format_str = get_time_format(format=format, locale=locale).pattern.lower()
1303+
if format in ('full', 'long', 'medium', 'short'):
1304+
fmt = get_time_format(format=format, locale=locale)
1305+
else:
1306+
fmt = parse_pattern(format)
1307+
format_str = fmt.pattern.lower()
12811308
hour_idx = format_str.find('h')
12821309
if hour_idx < 0:
12831310
hour_idx = format_str.index('k')

tests/test_dates.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -656,6 +656,13 @@ def test_parse_date():
656656
assert dates.parse_date('2004-04-01', locale='sv_SE', format='short') == date(2004, 4, 1)
657657

658658

659+
def test_parse_date_custom_format():
660+
assert dates.parse_date('1.4.2024', format='dd.mm.yyyy') == date(2024, 4, 1)
661+
assert dates.parse_date('2024.4.1', format='yyyy.mm.dd') == date(2024, 4, 1)
662+
# Dates that look like ISO 8601 should use the custom format as well:
663+
assert dates.parse_date('2024-04-01', format='yyyy.dd.mm') == date(2024, 1, 4)
664+
665+
659666
@pytest.mark.parametrize('input, expected', [
660667
# base case, fully qualified time
661668
('15:30:00', time(15, 30)),
@@ -705,6 +712,11 @@ def get_date_format(*args, **kwargs):
705712
assert dates.parse_date('2024-10-20') == date(2024, 10, 20)
706713

707714

715+
def test_parse_time_custom_format():
716+
assert dates.parse_time('15:30:00', format='HH:mm:ss') == time(15, 30)
717+
assert dates.parse_time('00:30:15', format='ss:mm:HH') == time(15, 30)
718+
719+
708720
@pytest.mark.parametrize('case', ['', 'a', 'aaa'])
709721
@pytest.mark.parametrize('func', [dates.parse_date, dates.parse_time])
710722
def test_parse_errors(case, func):

0 commit comments

Comments
 (0)