|
| 1 | +""" |
| 2 | +Tests for UTF-8 path handling fix (Issue #370). |
| 3 | +
|
| 4 | +Verifies that the driver correctly handles paths containing non-ASCII |
| 5 | +characters on Windows (e.g., usernames like 'Thalén', folders like 'café'). |
| 6 | +
|
| 7 | +Bug Summary: |
| 8 | +- GetModuleDirectory() used ANSI APIs (PathRemoveFileSpecA) which corrupted UTF-8 paths |
| 9 | +- LoadDriverLibrary() used broken UTF-8→UTF-16 conversion: std::wstring(path.begin(), path.end()) |
| 10 | +- LoadDriverOrThrowException() used same broken pattern for mssql-auth.dll |
| 11 | +
|
| 12 | +Fix: |
| 13 | +- Use std::filesystem::path which handles encoding correctly on all platforms |
| 14 | +- fs::path::c_str() returns wchar_t* on Windows with proper UTF-16 encoding |
| 15 | +""" |
| 16 | + |
| 17 | +import pytest |
| 18 | +import platform |
| 19 | +import sys |
| 20 | +import subprocess |
| 21 | + |
| 22 | +import mssql_python |
| 23 | +from mssql_python import ddbc_bindings |
| 24 | + |
| 25 | + |
| 26 | +class TestPathHandlingCodePaths: |
| 27 | + """ |
| 28 | + Test that path handling code paths are exercised correctly. |
| 29 | +
|
| 30 | + These tests run by DEFAULT and verify the fixed C++ functions |
| 31 | + (GetModuleDirectory, LoadDriverLibrary) are working. |
| 32 | + """ |
| 33 | + |
| 34 | + def test_module_import_exercises_path_handling(self): |
| 35 | + """ |
| 36 | + Verify module import succeeds - this exercises GetModuleDirectory(). |
| 37 | +
|
| 38 | + When mssql_python imports, it calls: |
| 39 | + 1. GetModuleDirectory() - to find module location |
| 40 | + 2. LoadDriverLibrary() - to load ODBC driver |
| 41 | + 3. LoadLibraryW() for mssql-auth.dll on Windows |
| 42 | +
|
| 43 | + If any of these fail due to path encoding issues, import fails. |
| 44 | + """ |
| 45 | + assert mssql_python is not None |
| 46 | + assert hasattr(mssql_python, "__file__") |
| 47 | + assert isinstance(mssql_python.__file__, str) |
| 48 | + |
| 49 | + def test_module_path_is_valid_utf8(self): |
| 50 | + """Verify module path is valid UTF-8 string.""" |
| 51 | + module_path = mssql_python.__file__ |
| 52 | + |
| 53 | + # Should be encodable/decodable as UTF-8 without errors |
| 54 | + encoded = module_path.encode("utf-8") |
| 55 | + decoded = encoded.decode("utf-8") |
| 56 | + assert decoded == module_path |
| 57 | + |
| 58 | + def test_connect_function_available(self): |
| 59 | + """Verify connect function is available (proves ddbc_bindings loaded).""" |
| 60 | + assert hasattr(mssql_python, "connect") |
| 61 | + assert callable(mssql_python.connect) |
| 62 | + |
| 63 | + def test_ddbc_bindings_loaded(self): |
| 64 | + """Verify ddbc_bindings C++ module loaded successfully.""" |
| 65 | + assert ddbc_bindings is not None |
| 66 | + |
| 67 | + def test_connection_class_available(self): |
| 68 | + """Verify Connection class from C++ bindings is accessible.""" |
| 69 | + assert ddbc_bindings.Connection is not None |
| 70 | + |
| 71 | + |
| 72 | +class TestPathWithNonAsciiCharacters: |
| 73 | + """ |
| 74 | + Test path handling with non-ASCII characters in strings. |
| 75 | +
|
| 76 | + These tests verify that Python string operations with non-ASCII |
| 77 | + characters work correctly (prerequisite for the C++ fix to work). |
| 78 | + """ |
| 79 | + |
| 80 | + # Non-ASCII test strings representing real-world scenarios |
| 81 | + NON_ASCII_PATHS = [ |
| 82 | + "Thalén", # Swedish - the original issue reporter's username |
| 83 | + "café", # French |
| 84 | + "日本語", # Japanese |
| 85 | + "中文", # Chinese |
| 86 | + "über", # German |
| 87 | + "Müller", # German umlaut |
| 88 | + "España", # Spanish |
| 89 | + "Россия", # Russian |
| 90 | + "한국어", # Korean |
| 91 | + "Ñoño", # Spanish ñ |
| 92 | + "Ångström", # Swedish å |
| 93 | + ] |
| 94 | + |
| 95 | + @pytest.mark.parametrize("non_ascii_name", NON_ASCII_PATHS) |
| 96 | + def test_path_string_with_non_ascii(self, non_ascii_name): |
| 97 | + """Test that Python can handle paths with non-ASCII characters.""" |
| 98 | + # Simulate Windows-style path |
| 99 | + test_path = f"C:\\Users\\{non_ascii_name}\\project\\.venv\\Lib\\site-packages" |
| 100 | + |
| 101 | + # Verify UTF-8 encoding/decoding works |
| 102 | + encoded = test_path.encode("utf-8") |
| 103 | + decoded = encoded.decode("utf-8") |
| 104 | + assert decoded == test_path |
| 105 | + assert non_ascii_name in decoded |
| 106 | + |
| 107 | + @pytest.mark.parametrize("non_ascii_name", NON_ASCII_PATHS) |
| 108 | + def test_pathlib_with_non_ascii(self, non_ascii_name, tmp_path): |
| 109 | + """Test that pathlib handles non-ASCII directory names.""" |
| 110 | + from pathlib import Path |
| 111 | + |
| 112 | + test_dir = tmp_path / non_ascii_name |
| 113 | + test_dir.mkdir() |
| 114 | + assert test_dir.exists() |
| 115 | + |
| 116 | + # Create a file in the non-ASCII directory |
| 117 | + test_file = test_dir / "test.txt" |
| 118 | + test_file.write_text("test content", encoding="utf-8") |
| 119 | + assert test_file.exists() |
| 120 | + |
| 121 | + # Read back |
| 122 | + content = test_file.read_text(encoding="utf-8") |
| 123 | + assert content == "test content" |
| 124 | + |
| 125 | + def test_path_with_multiple_non_ascii_segments(self, tmp_path): |
| 126 | + """Test path with multiple non-ASCII directory segments.""" |
| 127 | + from pathlib import Path |
| 128 | + |
| 129 | + # Create nested directories with non-ASCII names |
| 130 | + nested = tmp_path / "Thalén" / "プロジェクト" / "código" |
| 131 | + nested.mkdir(parents=True) |
| 132 | + assert nested.exists() |
| 133 | + |
| 134 | + def test_path_with_spaces_and_non_ascii(self, tmp_path): |
| 135 | + """Test path with both spaces and non-ASCII characters.""" |
| 136 | + from pathlib import Path |
| 137 | + |
| 138 | + test_dir = tmp_path / "My Thalén Project" |
| 139 | + test_dir.mkdir() |
| 140 | + assert test_dir.exists() |
| 141 | + |
| 142 | + |
| 143 | +@pytest.mark.skipif( |
| 144 | + platform.system() != "Windows", reason="DLL loading and path encoding issue is Windows-specific" |
| 145 | +) |
| 146 | +class TestWindowsSpecificPathHandling: |
| 147 | + """ |
| 148 | + Windows-specific tests for path handling. |
| 149 | +
|
| 150 | + These tests verify Windows-specific behavior related to the fix. |
| 151 | + """ |
| 152 | + |
| 153 | + def test_module_loads_on_windows(self): |
| 154 | + """Verify module loads correctly on Windows.""" |
| 155 | + import mssql_python |
| 156 | + |
| 157 | + # If we get here, LoadLibraryW succeeded for: |
| 158 | + # - msodbcsql18.dll |
| 159 | + # - mssql-auth.dll (if exists) |
| 160 | + assert mssql_python.ddbc_bindings is not None |
| 161 | + |
| 162 | + def test_libs_directory_exists(self): |
| 163 | + """Verify the libs/windows directory structure exists.""" |
| 164 | + from pathlib import Path |
| 165 | + |
| 166 | + module_dir = Path(mssql_python.__file__).parent |
| 167 | + libs_dir = module_dir / "libs" / "windows" |
| 168 | + |
| 169 | + # Check that at least one architecture directory exists |
| 170 | + arch_dirs = ["x64", "x86", "arm64"] |
| 171 | + found_arch = any((libs_dir / arch).exists() for arch in arch_dirs) |
| 172 | + assert found_arch, f"No architecture directory found in {libs_dir}" |
| 173 | + |
| 174 | + def test_auth_dll_exists_if_libs_present(self): |
| 175 | + """Verify mssql-auth.dll exists in the libs directory.""" |
| 176 | + from pathlib import Path |
| 177 | + import struct |
| 178 | + |
| 179 | + module_dir = Path(mssql_python.__file__).parent |
| 180 | + |
| 181 | + # Determine architecture |
| 182 | + arch = "x64" if struct.calcsize("P") * 8 == 64 else "x86" |
| 183 | + # Check for ARM64 |
| 184 | + |
| 185 | + if platform.machine().lower() in ("arm64", "aarch64"): |
| 186 | + arch = "arm64" |
| 187 | + |
| 188 | + auth_dll = module_dir / "libs" / "windows" / arch / "mssql-auth.dll" |
| 189 | + |
| 190 | + if auth_dll.parent.exists(): |
| 191 | + # If the directory exists, the DLL should be there |
| 192 | + assert auth_dll.exists(), f"mssql-auth.dll not found at {auth_dll}" |
| 193 | + |
| 194 | + |
| 195 | +class TestPathEncodingEdgeCases: |
| 196 | + """Test edge cases in path encoding handling.""" |
| 197 | + |
| 198 | + def test_ascii_only_path_still_works(self): |
| 199 | + """Verify ASCII-only paths continue to work (regression test).""" |
| 200 | + # If we got here, module loaded successfully |
| 201 | + assert mssql_python is not None |
| 202 | + |
| 203 | + def test_path_with_spaces(self): |
| 204 | + """Verify paths with spaces work (common Windows scenario).""" |
| 205 | + # Common Windows paths like "Program Files" have spaces |
| 206 | + # Module should load regardless |
| 207 | + assert mssql_python.__file__ is not None |
| 208 | + |
| 209 | + def test_very_long_path_component(self, tmp_path): |
| 210 | + """Test handling of long path components.""" |
| 211 | + from pathlib import Path |
| 212 | + |
| 213 | + # Windows MAX_PATH is 260, but individual components can be up to 255 |
| 214 | + long_name = "a" * 200 |
| 215 | + test_dir = tmp_path / long_name |
| 216 | + test_dir.mkdir() |
| 217 | + assert test_dir.exists() |
| 218 | + |
| 219 | + @pytest.mark.parametrize( |
| 220 | + "char", |
| 221 | + [ |
| 222 | + "é", |
| 223 | + "ñ", |
| 224 | + "ü", |
| 225 | + "ö", |
| 226 | + "å", |
| 227 | + "ø", |
| 228 | + "æ", # European diacritics |
| 229 | + "中", |
| 230 | + "日", |
| 231 | + "한", # CJK ideographs |
| 232 | + "α", |
| 233 | + "β", |
| 234 | + "γ", # Greek letters |
| 235 | + "й", |
| 236 | + "ж", |
| 237 | + "щ", # Cyrillic |
| 238 | + ], |
| 239 | + ) |
| 240 | + def test_individual_non_ascii_chars_utf8_roundtrip(self, char): |
| 241 | + """Test UTF-8 encoding roundtrip for individual non-ASCII characters.""" |
| 242 | + test_path = f"C:\\Users\\Test{char}User\\project" |
| 243 | + |
| 244 | + # UTF-8 roundtrip |
| 245 | + encoded = test_path.encode("utf-8") |
| 246 | + decoded = encoded.decode("utf-8") |
| 247 | + assert decoded == test_path |
| 248 | + assert char in decoded |
| 249 | + |
| 250 | + def test_emoji_in_path(self, tmp_path): |
| 251 | + """Test path with emoji characters (supplementary plane).""" |
| 252 | + from pathlib import Path |
| 253 | + |
| 254 | + # Emoji are in the supplementary planes (> U+FFFF) |
| 255 | + # This tests 4-byte UTF-8 sequences |
| 256 | + try: |
| 257 | + emoji_dir = tmp_path / "test_🚀_project" |
| 258 | + emoji_dir.mkdir() |
| 259 | + assert emoji_dir.exists() |
| 260 | + except OSError: |
| 261 | + # Some filesystems don't support emoji in filenames |
| 262 | + pytest.skip("Filesystem doesn't support emoji in filenames") |
| 263 | + |
| 264 | + def test_mixed_scripts_in_path(self, tmp_path): |
| 265 | + """Test path with mixed scripts (Latin + CJK + Cyrillic).""" |
| 266 | + from pathlib import Path |
| 267 | + |
| 268 | + mixed_name = "Project_项目_Проект" |
| 269 | + test_dir = tmp_path / mixed_name |
| 270 | + test_dir.mkdir() |
| 271 | + assert test_dir.exists() |
0 commit comments