Merge remote-tracking branch 'origin/main' into feature/relationship-info-model

tpellissier · tpellissier · commit a92e9d988ccf · 2026-02-23T15:51:43.000-08:00
diff --git a/.claude/skills/dataverse-sdk-use/SKILL.md b/.claude/skills/dataverse-sdk-use/SKILL.md
@@ -21,6 +21,7 @@ Use the PowerPlatform Dataverse Client Python SDK to interact with Microsoft Dat
 - `client.records` -- CRUD and OData queries
 - `client.query` -- query and search operations
 - `client.tables` -- table metadata, columns, and relationships
+- `client.files` -- file upload operations
 
 ### Bulk Operations
 The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()` for automatic bulk processing, for `delete()`, set `use_bulk_delete` when passing lists to use bulk operation
@@ -301,11 +302,11 @@ client.tables.delete_relationship(result["relationship_id"])
 
 ```python
 # Upload file to a file column
-client.upload_file(
-    table_schema_name="account",
+client.files.upload(
+    table="account",
     record_id=account_id,
-    file_name_attribute="new_Document",  # If the file column doesn't exist, it will be created automatically
-    path="/path/to/document.pdf"
+    file_column="new_Document",  # If the file column doesn't exist, it will be created automatically
+    path="/path/to/document.pdf",
 )
 ```
 
diff --git a/README.md b/README.md
@@ -112,8 +112,8 @@ The SDK provides a simple, pythonic interface for Dataverse operations:
 
 | Concept | Description |
 |---------|-------------|
-| **DataverseClient** | Main entry point; provides `records`, `query`, and `tables` namespaces |
-| **Namespaces** | Operations are organized into `client.records` (CRUD & OData queries), `client.query` (query & search), and `client.tables` (metadata) |
+| **DataverseClient** | Main entry point; provides `records`, `query`, `tables`, and `files` namespaces |
+| **Namespaces** | Operations are organized into `client.records` (CRUD & OData queries), `client.query` (query & search), `client.tables` (metadata), and `client.files` (file uploads) |
 | **Records** | Dataverse records represented as Python dictionaries with column schema names |
 | **Schema names** | Use table schema names (`"account"`, `"new_MyTestTable"`) and column schema names (`"name"`, `"new_MyTestColumn"`). See: [Table definitions in Microsoft Dataverse](https://learn.microsoft.com/en-us/power-apps/developer/data-platform/entity-metadata) |
 | **Bulk Operations** | Efficient bulk processing for multiple records with automatic optimization |
@@ -378,11 +378,11 @@ result = client.tables.create_lookup_field(
 
 ```python
 # Upload a file to a record
-client.upload_file(
-    table_schema_name="account",
-    record_id=account_id,
-    file_name_attribute="new_Document",  # If the file column doesn't exist, it will be created automatically
-    path="/path/to/document.pdf"
+client.files.upload(
+    "account",
+    account_id,
+    "new_Document",  # If the file column doesn't exist, it will be created automatically
+    "/path/to/document.pdf",
 )
 ```
 
diff --git a/examples/advanced/file_upload.py b/examples/advanced/file_upload.py
@@ -89,68 +89,25 @@ def file_sha256(path: Path):  # returns (hex_digest, size_bytes)
         return None, None
 
 
-def generate_test_pdf(size_mb: int = 10) -> Path:
-    """Generate a dummy PDF file of specified size for testing purposes."""
-    try:
-        from reportlab.pdfgen import canvas  # type: ignore # noqa: WPS433
-        from reportlab.lib.pagesizes import letter  # type: ignore # noqa: WPS433
-    except ImportError:
-        # Fallback: generate a simple binary file with PDF headers
-        test_file = Path(__file__).resolve().parent / f"test_dummy_{size_mb}mb.pdf"
-        target_size = size_mb * 1024 * 1024
-
-        # Minimal PDF structure
-        pdf_header = b"%PDF-1.4\n"
-        pdf_body = b"1 0 obj\n<< /Type /Catalog /Pages 2 0 R >>\nendobj\n"
-        pdf_body += b"2 0 obj\n<< /Type /Pages /Kids [3 0 R] /Count 1 >>\nendobj\n"
-        pdf_body += b"3 0 obj\n<< /Type /Page /Parent 2 0 R /MediaBox [0 0 612 792] >>\nendobj\n"
-
-        # Fill with dummy data to reach target size
-        current_size = len(pdf_header) + len(pdf_body)
-        padding_needed = target_size - current_size - 50  # Reserve space for trailer
-        padding = b"% " + (b"padding " * (padding_needed // 8))[:padding_needed] + b"\n"
-
-        pdf_trailer = b"xref\n0 4\ntrailer\n<< /Size 4 /Root 1 0 R >>\nstartxref\n0\n%%EOF\n"
-
-        with test_file.open("wb") as f:
-            f.write(pdf_header)
-            f.write(pdf_body)
-            f.write(padding)
-            f.write(pdf_trailer)
-
-        print({"test_pdf_generated": str(test_file), "size_mb": test_file.stat().st_size / (1024 * 1024)})
-        return test_file
-
-    # ReportLab available - generate proper PDF
-    test_file = Path(__file__).resolve().parent / f"test_dummy_{size_mb}mb.pdf"
-    c = canvas.Canvas(str(test_file), pagesize=letter)
-
-    # Add pages with content until we reach target size
-    target_size = size_mb * 1024 * 1024
-    page_num = 0
-
-    while test_file.exists() is False or test_file.stat().st_size < target_size:
-        page_num += 1
-        c.drawString(100, 750, f"Test PDF - Page {page_num}")
-        c.drawString(100, 730, f"Generated for file upload testing")
+def generate_test_file(size_mb: int = 10) -> Path:
+    """Generate a dummy text file of specified size for testing purposes.
 
-        # Add some text to increase file size
-        for i in range(50):
-            c.drawString(50, 700 - (i * 12), f"Line {i}: " + "Sample text content " * 20)
-
-        c.showPage()
-
-        # Save periodically to check size
-        if page_num % 10 == 0:
-            c.save()
-            if test_file.stat().st_size >= target_size:
-                break
-            c = canvas.Canvas(str(test_file), pagesize=letter)
+    Creates a plain text file with repeating content to reach the target
+    size. No external dependencies required.
+    """
+    test_file = Path(__file__).resolve().parent / f"test_dummy_{size_mb}mb.txt"
+    target_size = size_mb * 1024 * 1024
 
-    if not test_file.exists() or test_file.stat().st_size < target_size:
-        c.save()
+    line = b"The quick brown fox jumps over the lazy dog. " * 2 + b"\n"
+    with test_file.open("wb") as f:
+        written = 0
+        while written < target_size:
+            chunk = line * min(1000, (target_size - written) // len(line) + 1)
+            chunk = chunk[: target_size - written]
+            f.write(chunk)
+            written += len(chunk)
 
-    print({"test_pdf_generated": str(test_file), "size_mb": test_file.stat().st_size / (1024 * 1024)})
+    print({"test_file_generated": str(test_file), "size_mb": test_file.stat().st_size / (1024 * 1024)})
     return test_file
 
 
@@ -228,8 +185,8 @@ def ensure_table():
 
 # --------------------------- Shared dataset helpers ---------------------------
 _DATASET_INFO_CACHE = {}  # cache dict: file_path -> (path, size_bytes, sha256_hex)
-_GENERATED_TEST_FILE = generate_test_pdf(10)  # track generated file for cleanup
-_GENERATED_TEST_FILE_8MB = generate_test_pdf(8)  # track 8MB replacement file for cleanup
+_GENERATED_TEST_FILE = generate_test_file(10)  # track generated file for cleanup
+_GENERATED_TEST_FILE_8MB = generate_test_file(8)  # track 8MB replacement file for cleanup
 
 
 def get_dataset_info(file_path: Path):
@@ -248,11 +205,11 @@ def get_dataset_info(file_path: Path):
     try:
         DATASET_FILE, small_file_size, src_hash = get_dataset_info(_GENERATED_TEST_FILE)
         backoff(
-            lambda: client.upload_file(
-                table_schema_name,
-                record_id,
-                small_file_attr_schema,
-                str(DATASET_FILE),
+            lambda: client.files.upload(
+                table=table_schema_name,
+                record_id=record_id,
+                file_column=small_file_attr_schema,
+                path=str(DATASET_FILE),
                 mode="small",
             )
         )
@@ -282,12 +239,13 @@ def get_dataset_info(file_path: Path):
         print("Small single-request upload demo - REPLACE with 8MB file:")
         replacement_file, replace_size_small, replace_hash_small = get_dataset_info(_GENERATED_TEST_FILE_8MB)
         backoff(
-            lambda: client.upload_file(
-                table_schema_name,
-                record_id,
-                small_file_attr_schema,
-                str(replacement_file),
+            lambda: client.files.upload(
+                table=table_schema_name,
+                record_id=record_id,
+                file_column=small_file_attr_schema,
+                path=str(replacement_file),
                 mode="small",
+                if_none_match=False,
             )
         )
         print({"small_replace_upload_completed": True, "small_replace_source_size": replace_size_small})
@@ -316,15 +274,15 @@ def get_dataset_info(file_path: Path):
 
 # --------------------------- Chunk (streaming) upload demo ---------------------------
 if run_chunk:
-    print("Streaming chunk upload demo (upload_file_chunk):")
+    print("Streaming chunk upload demo (mode='chunk'):")
     try:
         DATASET_FILE, src_size_chunk, src_hash_chunk = get_dataset_info(_GENERATED_TEST_FILE)
         backoff(
-            lambda: client.upload_file(
-                table_schema_name,
-                record_id,
-                chunk_file_attr_schema,
-                str(DATASET_FILE),
+            lambda: client.files.upload(
+                table=table_schema_name,
+                record_id=record_id,
+                file_column=chunk_file_attr_schema,
+                path=str(DATASET_FILE),
                 mode="chunk",
             )
         )
@@ -351,12 +309,13 @@ def get_dataset_info(file_path: Path):
         print("Streaming chunk upload demo - REPLACE with 8MB file:")
         replacement_file, replace_size_chunk, replace_hash_chunk = get_dataset_info(_GENERATED_TEST_FILE_8MB)
         backoff(
-            lambda: client.upload_file(
-                table_schema_name,
-                record_id,
-                chunk_file_attr_schema,
-                str(replacement_file),
+            lambda: client.files.upload(
+                table=table_schema_name,
+                record_id=record_id,
+                file_column=chunk_file_attr_schema,
+                path=str(replacement_file),
                 mode="chunk",
+                if_none_match=False,
             )
         )
         print({"chunk_replace_upload_completed": True})
diff --git a/examples/basic/installation_example.py b/examples/basic/installation_example.py
@@ -63,6 +63,7 @@
 from PowerPlatform.Dataverse.operations.records import RecordOperations
 from PowerPlatform.Dataverse.operations.query import QueryOperations
 from PowerPlatform.Dataverse.operations.tables import TableOperations
+from PowerPlatform.Dataverse.operations.files import FileOperations
 
 
 def validate_imports():
@@ -123,17 +124,19 @@ def validate_client_methods(DataverseClient):
     print("\nValidating Client Methods...")
     print("-" * 50)
 
-    # Validate namespace API: client.records, client.query, client.tables
+    # Validate namespace API: client.records, client.query, client.tables, client.files
     expected_namespaces = {
         "records": ["create", "get", "update", "delete"],
-        "query": ["get", "sql"],
+        "query": ["sql"],
         "tables": ["create", "get", "list", "delete", "add_columns", "remove_columns"],
+        "files": ["upload"],
     }
 
     ns_classes = {
         "records": RecordOperations,
         "query": QueryOperations,
         "tables": TableOperations,
+        "files": FileOperations,
     }
 
     missing_methods = []
diff --git a/src/PowerPlatform/Dataverse/claude_skill/dataverse-sdk-use/SKILL.md b/src/PowerPlatform/Dataverse/claude_skill/dataverse-sdk-use/SKILL.md
@@ -21,6 +21,7 @@ Use the PowerPlatform Dataverse Client Python SDK to interact with Microsoft Dat
 - `client.records` -- CRUD and OData queries
 - `client.query` -- query and search operations
 - `client.tables` -- table metadata, columns, and relationships
+- `client.files` -- file upload operations
 
 ### Bulk Operations
 The SDK supports Dataverse's native bulk operations: Pass lists to `create()`, `update()` for automatic bulk processing, for `delete()`, set `use_bulk_delete` when passing lists to use bulk operation
@@ -301,11 +302,11 @@ client.tables.delete_relationship(result["relationship_id"])
 
 ```python
 # Upload file to a file column
-client.upload_file(
-    table_schema_name="account",
+client.files.upload(
+    table="account",
     record_id=account_id,
-    file_name_attribute="new_Document",  # If the file column doesn't exist, it will be created automatically
-    path="/path/to/document.pdf"
+    file_column="new_Document",  # If the file column doesn't exist, it will be created automatically
+    path="/path/to/document.pdf",
 )
 ```
 
diff --git a/src/PowerPlatform/Dataverse/client.py b/src/PowerPlatform/Dataverse/client.py
@@ -14,6 +14,7 @@
 from .data._odata import _ODataClient
 from .operations.records import RecordOperations
 from .operations.query import QueryOperations
+from .operations.files import FileOperations
 from .operations.tables import TableOperations
 
 
@@ -56,6 +57,7 @@ class DataverseClient:
     - ``client.records`` -- create, update, delete, and get records (single or paginated queries)
     - ``client.query`` -- query and search operations
     - ``client.tables`` -- table and column metadata management
+    - ``client.files`` -- file upload operations
 
     Example:
         Create a client and perform basic operations::
@@ -101,6 +103,7 @@ def __init__(
         self.records = RecordOperations(self)
         self.query = QueryOperations(self)
         self.tables = TableOperations(self)
+        self.files = FileOperations(self)
 
     def _get_odata(self) -> _ODataClient:
         """
@@ -665,67 +668,41 @@ def upload_file(
         if_none_match: bool = True,
     ) -> None:
         """
+        .. note::
+            Deprecated. Use :meth:`~PowerPlatform.Dataverse.operations.files.FileOperations.upload` instead.
+
         Upload a file to a Dataverse file column.
 
-        :param table_schema_name: Schema name of the table, e.g. ``"account"`` or ``"new_MyTestTable"``.
+        :param table_schema_name: Schema name of the table.
         :type table_schema_name: :class:`str`
         :param record_id: GUID of the target record.
         :type record_id: :class:`str`
-        :param file_name_attribute: Schema name of the file column attribute (e.g., ``"new_Document"``). If the column doesn't exist, it will be created automatically.
+        :param file_name_attribute: Schema name of the file column attribute.
         :type file_name_attribute: :class:`str`
-        :param path: Local filesystem path to the file. The stored filename will be
-            the basename of this path.
+        :param path: Local filesystem path to the file.
         :type path: :class:`str`
         :param mode: Upload strategy: ``"auto"`` (default), ``"small"``, or ``"chunk"``.
-            Auto mode selects small or chunked upload based on file size.
         :type mode: :class:`str` or None
-        :param mime_type: Explicit MIME type to store with the file (e.g. ``"application/pdf"``).
-            If not provided, the MIME type may be inferred from the file extension.
+        :param mime_type: Explicit MIME type to store with the file.
         :type mime_type: :class:`str` or None
-        :param if_none_match: When True (default), sends ``If-None-Match: null`` header to only
-            succeed if the column is currently empty. Set False to always overwrite using
-            ``If-Match: *``. Used for small and chunk modes only.
+        :param if_none_match: When True (default), only succeed if the column is
+            currently empty.
         :type if_none_match: :class:`bool`
-
-        :raises ~PowerPlatform.Dataverse.core.errors.HttpError: If the upload fails or the file column is not empty
-            when ``if_none_match=True``.
-        :raises FileNotFoundError: If the specified file path does not exist.
-
-        .. note::
-            Large files are automatically chunked to avoid request size limits. The chunk mode performs multiple requests with resumable upload support.
-
-        Example:
-            Upload a PDF file::
-
-                client.upload_file(
-                    table_schema_name="account",
-                    record_id=account_id,
-                    file_name_attribute="new_Contract",
-                    path="/path/to/contract.pdf",
-                    mime_type="application/pdf"
-                )
-
-            Upload with auto mode selection::
-
-                client.upload_file(
-                    table_schema_name="email",
-                    record_id=email_id,
-                    file_name_attribute="new_Attachment",
-                    path="/path/to/large_file.zip",
-                    mode="auto"
-                )
         """
-        with self._scoped_odata() as od:
-            od._upload_file(
-                table_schema_name,
-                record_id,
-                file_name_attribute,
-                path,
-                mode=mode,
-                mime_type=mime_type,
-                if_none_match=if_none_match,
-            )
-            return None
+        warnings.warn(
+            "client.upload_file() is deprecated. Use client.files.upload() instead.",
+            DeprecationWarning,
+            stacklevel=2,
+        )
+        self.files.upload(
+            table_schema_name,
+            record_id,
+            file_name_attribute,
+            path,
+            mode=mode,
+            mime_type=mime_type,
+            if_none_match=if_none_match,
+        )
 
     # Cache utilities
     def flush_cache(self, kind) -> int:
diff --git a/src/PowerPlatform/Dataverse/operations/files.py b/src/PowerPlatform/Dataverse/operations/files.py
diff --git a/tests/unit/test_client_deprecations.py b/tests/unit/test_client_deprecations.py
diff --git a/tests/unit/test_files_operations.py b/tests/unit/test_files_operations.py