Skip to content

Commit 032e512

Browse files
committed
fix windows filepath parsing bug, and add tests for parse_store_url
1 parent 2561669 commit 032e512

2 files changed

Lines changed: 105 additions & 1 deletion

File tree

src/zarr/storage/_utils.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
from __future__ import annotations
22

33
import re
4-
from pathlib import Path
4+
import sys
5+
from pathlib import Path, PureWindowsPath
56
from typing import TYPE_CHECKING, NamedTuple, TypeVar
67
from urllib.parse import urlparse
78

@@ -65,6 +66,12 @@ def parse_store_url(url: str) -> ParsedStoreUrl:
6566
>>> parse_store_url("/local/path")
6667
ParsedStoreUrl(scheme='', name=None, path='/local/path', raw='/local/path')
6768
"""
69+
# On Windows, bare paths like "C:\foo" or "C:/foo" cause urlparse to
70+
# misinterpret the drive letter as a URL scheme. Detect this early and
71+
# return a local-path result without going through urlparse.
72+
if sys.platform == "win32" and PureWindowsPath(url).drive:
73+
return ParsedStoreUrl(scheme="", name=None, path=url, raw=url)
74+
6875
parsed = urlparse(url)
6976

7077
# netloc is the "host" part (store name for memory://, bucket for s3://, etc.)

tests/test_store/test_utils.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
from __future__ import annotations
2+
3+
import sys
4+
from unittest.mock import patch
5+
6+
import pytest
7+
8+
from zarr.storage._utils import ParsedStoreUrl, parse_store_url
9+
10+
11+
class TestParseStoreUrl:
12+
"""Tests for parse_store_url."""
13+
14+
def test_memory_url(self) -> None:
15+
result = parse_store_url("memory://mystore")
16+
assert result == ParsedStoreUrl(
17+
scheme="memory", name="mystore", path="", raw="memory://mystore"
18+
)
19+
20+
def test_memory_url_with_path(self) -> None:
21+
result = parse_store_url("memory://mystore/path/to/data")
22+
assert result == ParsedStoreUrl(
23+
scheme="memory",
24+
name="mystore",
25+
path="path/to/data",
26+
raw="memory://mystore/path/to/data",
27+
)
28+
29+
def test_memory_url_no_name(self) -> None:
30+
result = parse_store_url("memory://")
31+
assert result.scheme == "memory"
32+
assert result.name is None
33+
34+
def test_s3_url(self) -> None:
35+
result = parse_store_url("s3://bucket/key")
36+
assert result == ParsedStoreUrl(
37+
scheme="s3", name="bucket", path="key", raw="s3://bucket/key"
38+
)
39+
40+
def test_file_url(self) -> None:
41+
result = parse_store_url("file:///tmp/test")
42+
assert result.scheme == "file"
43+
44+
def test_local_absolute_path(self) -> None:
45+
result = parse_store_url("/local/path")
46+
assert result == ParsedStoreUrl(scheme="", name=None, path="/local/path", raw="/local/path")
47+
48+
def test_local_relative_path(self) -> None:
49+
result = parse_store_url("relative/path")
50+
assert result == ParsedStoreUrl(
51+
scheme="", name=None, path="relative/path", raw="relative/path"
52+
)
53+
54+
@pytest.mark.parametrize(
55+
"url",
56+
[
57+
"C:\\Users\\foo",
58+
"C:/Users/foo",
59+
"D:/data/zarr",
60+
"c:/test",
61+
],
62+
)
63+
def test_windows_drive_letter(self, url: str) -> None:
64+
"""On Windows, bare drive-letter paths must be treated as local paths."""
65+
with patch.object(sys, "platform", "win32"):
66+
result = parse_store_url(url)
67+
assert result.scheme == ""
68+
assert result.name is None
69+
assert result.path == url
70+
assert result.raw == url
71+
72+
@pytest.mark.parametrize(
73+
"url",
74+
[
75+
"file:///C:/Users/foo",
76+
"file://C:/Users/foo",
77+
],
78+
)
79+
def test_file_url_with_drive_letter_on_windows(self, url: str) -> None:
80+
"""file:// URLs with drive letters are not treated as bare paths."""
81+
with patch.object(sys, "platform", "win32"):
82+
result = parse_store_url(url)
83+
assert result.scheme == "file"
84+
85+
@pytest.mark.parametrize(
86+
"url",
87+
[
88+
"C:\\Users\\foo",
89+
"C:/Users/foo",
90+
],
91+
)
92+
def test_drive_letter_not_special_on_non_windows(self, url: str) -> None:
93+
"""On non-Windows platforms, drive-letter paths go through urlparse."""
94+
with patch.object(sys, "platform", "linux"):
95+
result = parse_store_url(url)
96+
# urlparse interprets the drive letter as a scheme
97+
assert result.scheme == "c"

0 commit comments

Comments
 (0)