diff --git a/README.md b/README.md index 551f6dcf..7f0c1409 100644 --- a/README.md +++ b/README.md @@ -47,6 +47,7 @@ A Python client library for Microsoft Dataverse that provides a unified interfac - **📦 Batch Operations**: Send multiple CRUD, table metadata, and SQL query operations in a single HTTP request with optional transactional changesets - **🔐 Azure Identity**: Built-in authentication using Azure Identity credential providers with comprehensive support - **🛡️ Error Handling**: Structured exception hierarchy with detailed error context and retry guidance +- **📋 HTTP Diagnostics Logging**: Opt-in file-based logging of all HTTP requests and responses with automatic redaction of sensitive headers (e.g. `Authorization`) ## Getting started @@ -672,6 +673,58 @@ For optimal performance in production environments: | **Production Credentials** | Use `ClientSecretCredential` or `CertificateCredential` for unattended operations | | **Error Handling** | Implement retry logic for transient errors (`e.is_transient`) | +### HTTP diagnostics logging + +Enable file-based HTTP logging to capture all requests and responses for debugging. Sensitive headers (e.g. `Authorization`) are automatically redacted. + +```python +from PowerPlatform.Dataverse.client import DataverseClient +from PowerPlatform.Dataverse.core.config import DataverseConfig +from PowerPlatform.Dataverse.core.log_config import LogConfig + +log_cfg = LogConfig( + log_folder="./my_logs", # Directory for log files (created if missing) + log_file_prefix="crm_debug", # Filename prefix; timestamp appended automatically + max_body_bytes=4096, # Bytes of body to capture per entry — 0 (default) disables body capture +) +config = DataverseConfig(log_config=log_cfg) +client = DataverseClient("https://yourorg.crm.dynamics.com", credential, config=config) +``` + +Each log file is timestamped and rotated automatically (default 10 MB per file, 5 backups). Sample output: + +``` +[2026-04-11T15:27:31-0700] DEBUG >>> REQUEST POST https://yourorg.crm.dynamics.com/api/data/v9.2/accounts + Authorization: [REDACTED] + Accept: application/json + Content-Type: application/json + OData-MaxVersion: 4.0 + OData-Version: 4.0 + User-Agent: DataverseSvcPythonClient:0.1.0b8 + x-ms-client-request-id: 7050c4d0-6bcc-48e3-a310-b4e8fa18ac69 + x-ms-correlation-id: 4cace77d-e4ee-4419-8c65-fc62beed6e71 + Body: {"name":"Contoso Ltd"} +[2026-04-11T15:27:31-0700] DEBUG <<< RESPONSE 204 POST https://yourorg.crm.dynamics.com/api/data/v9.2/accounts (78.0ms) + Content-Type: application/json; odata.metadata=minimal + OData-Version: 4.0 + x-ms-service-request-id: a6d0b6c4-5dd1-47cb-83eb-b6fccf754216 + x-ms-ratelimit-burst-remaining-xrm-requests: 7998 +``` + +> **Security note:** This feature is intended for development and debugging only. +> Log files are **plaintext** and may contain PII, sensitive business data, and +> Dataverse record IDs — even with `max_body_bytes=0` (the default), request URLs +> can include filter values and record identifiers. +> +> - **Never enable in production.** If required for production diagnostics, keep +> `max_body_bytes=0` and treat log files as regulated data under your organization's +> data handling policy. +> - **Restrict access.** Set file system permissions so only the process user can +> read log files. Use an encrypted volume or folder in sensitive environments. +> - **Control retention.** Log rotation keeps up to 5 files by default (`backup_count`). +> Delete logs after the debugging session; use secure deletion for regulated data. +> - **Prevent source control leaks.** Add the log folder to `.gitignore` immediately. + ### Limitations - SQL queries are **read-only** and support a limited subset of SQL syntax diff --git a/src/PowerPlatform/Dataverse/core/_http.py b/src/PowerPlatform/Dataverse/core/_http.py index 626dd52a..fe740888 100644 --- a/src/PowerPlatform/Dataverse/core/_http.py +++ b/src/PowerPlatform/Dataverse/core/_http.py @@ -12,10 +12,13 @@ from __future__ import annotations import time -from typing import Any, Optional +from typing import TYPE_CHECKING, Any, Optional import requests +if TYPE_CHECKING: + from ._http_logger import _HttpLogger + class _HttpClient: """ @@ -34,6 +37,9 @@ class _HttpClient: When provided, all requests use this session (enabling TCP/TLS reuse). When ``None``, each request uses ``requests.request()`` directly. :type session: :class:`requests.Session` | None + :param logger: Optional HTTP diagnostics logger. When provided, all requests, + responses, and transport errors are logged with automatic header redaction. + :type logger: ~PowerPlatform.Dataverse.core._http_logger._HttpLogger | None """ def __init__( @@ -42,11 +48,13 @@ def __init__( backoff: Optional[float] = None, timeout: Optional[float] = None, session: Optional[requests.Session] = None, + logger: Optional["_HttpLogger"] = None, ) -> None: self.max_attempts = retries if retries is not None else 5 self.base_delay = backoff if backoff is not None else 0.5 self.default_timeout: Optional[float] = timeout self._session = session + self._logger = logger def _request(self, method: str, url: str, **kwargs: Any) -> requests.Response: """ @@ -73,12 +81,52 @@ def _request(self, method: str, url: str, **kwargs: Any) -> requests.Response: m = (method or "").lower() kwargs["timeout"] = 120 if m in ("post", "delete") else 10 + # Log outbound request once (before retry loop). + # Use explicit key presence checks so falsy values (e.g. {}) are logged correctly. + if self._logger is not None: + if "json" in kwargs: + req_body = kwargs["json"] + elif "data" in kwargs: + req_body = kwargs["data"] + else: + req_body = None + self._logger.log_request( + method, + url, + headers=kwargs.get("headers"), + body=req_body, + ) + # Small backoff retry on network errors only requester = self._session.request if self._session is not None else requests.request for attempt in range(self.max_attempts): try: - return requester(method, url, **kwargs) - except requests.exceptions.RequestException: + t0 = time.monotonic() + resp = requester(method, url, **kwargs) + elapsed_ms = (time.monotonic() - t0) * 1000 + + if self._logger is not None: + # Only decode resp.text when body logging is enabled — avoids + # unnecessary overhead for large payloads when max_body_bytes == 0. + resp_body = resp.text if self._logger.body_logging_enabled else None + self._logger.log_response( + method, + url, + status_code=resp.status_code, + headers=dict(resp.headers), + body=resp_body, + elapsed_ms=elapsed_ms, + ) + return resp + except requests.exceptions.RequestException as exc: + if self._logger is not None: + self._logger.log_error( + method, + url, + exc, + attempt=attempt + 1, + max_attempts=self.max_attempts, + ) if attempt == self.max_attempts - 1: raise delay = self.base_delay * (2**attempt) diff --git a/src/PowerPlatform/Dataverse/core/_http_logger.py b/src/PowerPlatform/Dataverse/core/_http_logger.py new file mode 100644 index 00000000..3754c1fb --- /dev/null +++ b/src/PowerPlatform/Dataverse/core/_http_logger.py @@ -0,0 +1,169 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Internal HTTP logger that writes redacted request/response diagnostics to local files. +""" + +from __future__ import annotations + +import json as _json +import logging +import os +import uuid +from datetime import datetime, timezone +from logging.handlers import RotatingFileHandler +from typing import Any, Dict, Optional + +from .log_config import LogConfig + + +class _HttpLogger: + """Structured HTTP diagnostic logger with automatic header redaction.""" + + def __init__(self, config: LogConfig) -> None: + self._config = config + self._redacted = {h.lower() for h in config.redacted_headers} + + # Ensure folder exists + os.makedirs(config.log_folder, exist_ok=True) + + # Build timestamped filename — random suffix guarantees uniqueness even when + # multiple clients are created within the same microsecond (e.g. in tests). + ts = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S") + filename = f"{config.log_file_prefix}_{ts}_{uuid.uuid4().hex[:6]}.log" + filepath = os.path.join(config.log_folder, filename) + + # Create a dedicated named logger (not root) to avoid side effects + logger_name = f"PowerPlatform.Dataverse.http.{uuid.uuid4().hex[:8]}" + self._logger = logging.getLogger(logger_name) + self._logger.setLevel(getattr(logging, config.log_level.upper(), logging.DEBUG)) + self._logger.propagate = False # don't bubble to root + + self._handler = RotatingFileHandler( + filepath, + maxBytes=config.max_file_bytes, + backupCount=config.backup_count, + encoding="utf-8", + ) + formatter = logging.Formatter( + "[%(asctime)s] %(levelname)s %(message)s", + datefmt="%Y-%m-%dT%H:%M:%S%z", + ) + self._handler.setFormatter(formatter) + self._logger.addHandler(self._handler) + + # Write a one-time config summary so readers know what is and isn't captured. + body_status = ( + f"{config.max_body_bytes} bytes" + if config.max_body_bytes > 0 + else "0 (disabled — set max_body_bytes > 0 to enable)" + ) + redacted = ", ".join(sorted(config.redacted_headers)) + self._logger.debug( + "\n".join( + [ + "=== Dataverse HTTP Diagnostics ===", + f" log_level: {config.log_level.upper()}", + f" max_body_bytes: {body_status}", + f" redacted_headers: {redacted}", + "==================================", + ] + ) + ) + + def log_request( + self, + method: str, + url: str, + headers: Optional[Dict[str, str]] = None, + body: Any = None, + ) -> None: + """Log an outbound HTTP request.""" + safe_headers = self._redact_headers(headers or {}) + body_text = self._truncate_body(body) + lines = [f">>> REQUEST {method.upper()} {url}"] + lines += [f" {k}: {v}" for k, v in safe_headers.items()] + if body_text: + lines.append(f" Body: {body_text}") + self._logger.debug("\n".join(lines)) + + def log_response( + self, + method: str, + url: str, + status_code: int, + headers: Optional[Dict[str, str]] = None, + body: Any = None, + elapsed_ms: Optional[float] = None, + ) -> None: + """Log an inbound HTTP response.""" + safe_headers = self._redact_headers(headers or {}) + body_text = self._truncate_body(body) + elapsed_str = f" ({elapsed_ms:.1f}ms)" if elapsed_ms is not None else "" + lines = [f"<<< RESPONSE {status_code} {method.upper()} {url}{elapsed_str}"] + lines += [f" {k}: {v}" for k, v in safe_headers.items()] + if body_text: + lines.append(f" Body: {body_text}") + elif not self.body_logging_enabled: + # Body capture is disabled. Emit an explicit marker when the response + # has a Content-Length > 0 so readers know content exists but was not captured. + raw_headers = headers or {} + content_length = raw_headers.get("Content-Length") or raw_headers.get("content-length", "0") + try: + has_body = int(content_length) > 0 + except (ValueError, TypeError): + has_body = False + if has_body: + lines.append(" Body: [not captured — set max_body_bytes > 0 to enable]") + self._logger.debug("\n".join(lines)) + + def log_error( + self, + method: str, + url: str, + error: Exception, + attempt: Optional[int] = None, + max_attempts: Optional[int] = None, + ) -> None: + """Log an HTTP transport error.""" + attempt_str = f" [attempt {attempt}/{max_attempts}]" if attempt is not None and max_attempts is not None else "" + self._logger.error(f"!!! ERROR {method.upper()} {url}{attempt_str} - {type(error).__name__}: {error}") + + @property + def body_logging_enabled(self) -> bool: + """Return True if body capture is enabled (max_body_bytes > 0).""" + return self._config.max_body_bytes != 0 + + def close(self) -> None: + """Flush and close the underlying file handler. Safe to call multiple times.""" + self._handler.flush() + self._handler.close() + self._logger.removeHandler(self._handler) + + def _redact_headers(self, headers: Dict[str, str]) -> Dict[str, str]: + return {k: ("[REDACTED]" if k.lower() in self._redacted else v) for k, v in headers.items()} + + def _truncate_body(self, body: Any) -> str: + if body is None: + return "" + if isinstance(body, (bytes, bytearray)): + text = body.decode("utf-8", errors="replace") + elif not isinstance(body, str): + try: + text = _json.dumps(body, default=str, ensure_ascii=False) + except (TypeError, ValueError): + text = str(body) + else: + text = body + + limit = self._config.max_body_bytes + if limit == 0: + return "" + encoded = text.encode("utf-8") + if len(encoded) > limit: + # Truncate on byte boundary, then decode safely to avoid splitting + # multi-byte characters. Report the true byte length, not char count. + truncated = encoded[:limit].decode("utf-8", errors="ignore") + return truncated + f"... [truncated, {len(encoded)} bytes total]" + return text diff --git a/src/PowerPlatform/Dataverse/core/config.py b/src/PowerPlatform/Dataverse/core/config.py index 19c7a578..9c7c41f5 100644 --- a/src/PowerPlatform/Dataverse/core/config.py +++ b/src/PowerPlatform/Dataverse/core/config.py @@ -12,7 +12,10 @@ from __future__ import annotations from dataclasses import dataclass -from typing import Optional +from typing import TYPE_CHECKING, Optional + +if TYPE_CHECKING: + from .log_config import LogConfig @dataclass(frozen=True) @@ -28,6 +31,10 @@ class DataverseConfig: :type http_backoff: :class:`float` or None :param http_timeout: Optional request timeout in seconds. Reserved for future use. :type http_timeout: :class:`float` or None + :param log_config: Optional local HTTP diagnostics logging configuration. + When provided, all HTTP requests and responses are logged to timestamped + ``.log`` files with automatic redaction of sensitive headers. + :type log_config: ~PowerPlatform.Dataverse.core.log_config.LogConfig or None """ language_code: int = 1033 @@ -37,6 +44,8 @@ class DataverseConfig: http_backoff: Optional[float] = None http_timeout: Optional[float] = None + log_config: Optional["LogConfig"] = None + @classmethod def from_env(cls) -> "DataverseConfig": """ @@ -51,4 +60,5 @@ def from_env(cls) -> "DataverseConfig": http_retries=None, http_backoff=None, http_timeout=None, + log_config=None, ) diff --git a/src/PowerPlatform/Dataverse/core/log_config.py b/src/PowerPlatform/Dataverse/core/log_config.py new file mode 100644 index 00000000..73100703 --- /dev/null +++ b/src/PowerPlatform/Dataverse/core/log_config.py @@ -0,0 +1,75 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +""" +Local file logging configuration for Dataverse SDK HTTP diagnostics. + +Provides :class:`LogConfig`, an opt-in configuration for writing request/response +traces to ``.log`` files with automatic header redaction and timestamped filenames. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import FrozenSet + +_VALID_LOG_LEVELS: FrozenSet[str] = frozenset({"DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"}) + +__all__ = ["LogConfig"] + +# Headers whose values must never appear in log files +_DEFAULT_REDACTED_HEADERS: FrozenSet[str] = frozenset( + { + "authorization", + "proxy-authorization", + "x-ms-authorization-auxiliary", + "ocp-apim-subscription-key", + } +) + + +def _default_redacted_headers() -> FrozenSet[str]: + return _DEFAULT_REDACTED_HEADERS + + +@dataclass(frozen=True) +class LogConfig: + """ + Configuration for local HTTP diagnostics logging. + + When provided to :class:`~PowerPlatform.Dataverse.client.DataverseClient` via + :class:`~PowerPlatform.Dataverse.core.config.DataverseConfig`, every HTTP request + and response is logged to timestamped ``.log`` files in the specified folder. + Sensitive headers (e.g. ``Authorization``) are automatically redacted. + + :param log_folder: Directory path for log files. Created automatically if missing. + Default: ``"./dataverse_logs"`` + :param log_file_prefix: Filename prefix. Timestamp is appended automatically. + Default: ``"dataverse"`` → ``dataverse_20260310_143022.log`` + :param max_body_bytes: Maximum bytes of request/response body to capture. + ``0`` (default) disables body capture. Enable only for active debugging + sessions — bodies may contain PII and sensitive business data. + :param redacted_headers: Header names (case-insensitive) whose values are + replaced with ``"[REDACTED]"`` in logs. Defaults include + ``Authorization``, ``Proxy-Authorization``, etc. + :param log_level: Python logging level name. Default: ``"DEBUG"``. + :param max_file_bytes: Max size per log file before rotation (bytes). + Default: ``10_485_760`` (10 MB). + :param backup_count: Number of rotated backup files to keep. Default: ``5``. + """ + + def __post_init__(self) -> None: + if self.log_level.upper() not in _VALID_LOG_LEVELS: + raise ValueError( + f"Invalid log_level {self.log_level!r}. " f"Must be one of: {', '.join(sorted(_VALID_LOG_LEVELS))}." + ) + + log_folder: str = "./dataverse_logs" + log_file_prefix: str = "dataverse" + max_body_bytes: int = 0 # Body capture disabled by default — opt-in only. Request URLs + # are always logged and may contain filter values and record identifiers. + # Enable only for active debugging sessions and treat log files as regulated data. + redacted_headers: FrozenSet[str] = field(default_factory=_default_redacted_headers) + log_level: str = "DEBUG" + max_file_bytes: int = 10_485_760 # 10 MB + backup_count: int = 5 diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py index 3ec490d3..54ac22ca 100644 --- a/src/PowerPlatform/Dataverse/data/_odata.py +++ b/src/PowerPlatform/Dataverse/data/_odata.py @@ -190,13 +190,18 @@ def __init__( "PowerPlatform.Dataverse.core.config", fromlist=["DataverseConfig"] ).DataverseConfig.from_env() ) + self._http_logger = None + if self.config.log_config is not None: + from ..core._http_logger import _HttpLogger + + self._http_logger = _HttpLogger(self.config.log_config) self._http = _HttpClient( retries=self.config.http_retries, backoff=self.config.http_backoff, timeout=self.config.http_timeout, session=session, + logger=self._http_logger, ) - # Cache: normalized table_schema_name (lowercase) -> entity set name (plural) resolved from metadata self._logical_to_entityset_cache: dict[str, str] = {} # Cache: normalized table_schema_name (lowercase) -> primary id attribute (e.g. accountid) self._logical_primaryid_cache: dict[str, str] = {} @@ -224,6 +229,9 @@ def close(self) -> None: self._picklist_label_cache.clear() if self._http is not None: self._http.close() + if self._http_logger is not None: + self._http_logger.close() + self._http_logger = None def _headers(self) -> Dict[str, str]: """Build standard OData headers with bearer auth.""" diff --git a/tests/unit/core/test_http_logger.py b/tests/unit/core/test_http_logger.py new file mode 100644 index 00000000..aae6e966 --- /dev/null +++ b/tests/unit/core/test_http_logger.py @@ -0,0 +1,597 @@ +# Copyright (c) Microsoft Corporation. +# Licensed under the MIT license. + +"""Unit tests for _HttpLogger and LogConfig.""" + +from __future__ import annotations + +import dataclasses +import os +import re + +import pytest + +from PowerPlatform.Dataverse.core._http_logger import _HttpLogger +from PowerPlatform.Dataverse.core.config import DataverseConfig +from PowerPlatform.Dataverse.core.log_config import LogConfig + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _make_logger(tmp_path, **kwargs) -> _HttpLogger: + """Create an _HttpLogger writing to a temp directory.""" + cfg = LogConfig(log_folder=str(tmp_path), **kwargs) + return _HttpLogger(cfg) + + +def _read_log(tmp_path) -> str: + """Return the concatenated content of all .log files in tmp_path.""" + parts = [] + for fname in sorted(os.listdir(tmp_path)): + if fname.endswith(".log"): + with open(os.path.join(tmp_path, fname), encoding="utf-8") as fh: + parts.append(fh.read()) + return "".join(parts) + + +# --------------------------------------------------------------------------- +# LogConfig defaults +# --------------------------------------------------------------------------- + + +def test_log_config_defaults(): + cfg = LogConfig() + assert cfg.log_folder == "./dataverse_logs" + assert cfg.log_file_prefix == "dataverse" + assert cfg.max_body_bytes == 0 + assert cfg.log_level == "DEBUG" + assert cfg.max_file_bytes == 10_485_760 + assert cfg.backup_count == 5 + assert "authorization" in cfg.redacted_headers + assert "proxy-authorization" in cfg.redacted_headers + + +def test_log_config_frozen(): + cfg = LogConfig() + with pytest.raises(dataclasses.FrozenInstanceError): + cfg.log_folder = "/other" # type: ignore[misc] + + +def test_log_config_invalid_log_level_raises(): + """An unrecognised log_level must raise ValueError at construction time.""" + with pytest.raises(ValueError, match="Invalid log_level"): + LogConfig(log_level="DEBG") + + +def test_log_config_valid_log_levels(): + """All standard Python log level names must be accepted.""" + for level in ("DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"): + cfg = LogConfig(log_level=level) + assert cfg.log_level == level + + +def test_log_config_log_level_case_insensitive(): + """log_level validation must be case-insensitive.""" + cfg = LogConfig(log_level="debug") + assert cfg.log_level == "debug" + + +# --------------------------------------------------------------------------- +# Session config summary header +# --------------------------------------------------------------------------- + + +def test_session_header_written_on_init(tmp_path): + """A config summary block must be written to the log when the logger is created.""" + _make_logger(tmp_path) + content = _read_log(tmp_path) + assert "=== Dataverse HTTP Diagnostics ===" in content + assert "==================================\n" in content or "==================================" in content + + +def test_session_header_shows_body_disabled(tmp_path): + """When max_body_bytes=0 the header must say body capture is disabled.""" + _make_logger(tmp_path) # default is 0 + content = _read_log(tmp_path) + assert "disabled" in content + assert "max_body_bytes" in content + + +def test_session_header_shows_body_limit_when_enabled(tmp_path): + """When max_body_bytes > 0 the header must show the byte limit.""" + _make_logger(tmp_path, max_body_bytes=4096) + content = _read_log(tmp_path) + assert "4096 bytes" in content + + +def test_session_header_shows_redacted_headers(tmp_path): + """The header must list the configured redacted headers.""" + _make_logger(tmp_path) + content = _read_log(tmp_path) + assert "authorization" in content + assert "redacted_headers" in content + + +# --------------------------------------------------------------------------- +# Log file creation +# --------------------------------------------------------------------------- + + +def test_log_file_created(tmp_path): + _make_logger(tmp_path) + log_files = [f for f in os.listdir(tmp_path) if f.endswith(".log")] + assert len(log_files) == 1 + # File should match: _YYYYMMDD_HHMMSS_<6-char hex>.log + assert re.match(r"dataverse_\d{8}_\d{6}_[0-9a-f]{6}\.log", log_files[0]) + + +def test_log_file_custom_prefix(tmp_path): + _make_logger(tmp_path, log_file_prefix="crm_debug") + log_files = [f for f in os.listdir(tmp_path) if f.endswith(".log")] + assert log_files[0].startswith("crm_debug_") + + +def test_log_folder_created_if_missing(tmp_path): + nested = os.path.join(str(tmp_path), "deep", "nested") + cfg = LogConfig(log_folder=nested) + _HttpLogger(cfg) + assert os.path.isdir(nested) + + +# --------------------------------------------------------------------------- +# Header redaction +# --------------------------------------------------------------------------- + + +def test_authorization_header_redacted(tmp_path): + logger = _make_logger(tmp_path) + logger.log_request("GET", "https://example.com", headers={"Authorization": "Bearer secret123"}) + content = _read_log(tmp_path) + assert "secret123" not in content + assert "[REDACTED]" in content + + +def test_proxy_authorization_redacted(tmp_path): + logger = _make_logger(tmp_path) + logger.log_request("GET", "https://example.com", headers={"Proxy-Authorization": "Basic xyz"}) + content = _read_log(tmp_path) + assert "xyz" not in content + assert "[REDACTED]" in content + + +def test_safe_headers_not_redacted(tmp_path): + logger = _make_logger(tmp_path) + logger.log_request("GET", "https://example.com", headers={"Content-Type": "application/json"}) + content = _read_log(tmp_path) + assert "application/json" in content + + +def test_headers_formatted_one_per_line(tmp_path): + """Each header must appear on its own line as 'key: value', not as a dict repr.""" + logger = _make_logger(tmp_path) + logger.log_request( + "GET", + "https://example.com", + headers={"Accept": "application/json", "OData-Version": "4.0"}, + ) + content = _read_log(tmp_path) + assert " Accept: application/json" in content + assert " OData-Version: 4.0" in content + # Old dict format must not be present + assert "Headers: {" not in content + + +def test_case_insensitive_redaction(tmp_path): + logger = _make_logger(tmp_path) + logger.log_request("GET", "https://example.com", headers={"AUTHORIZATION": "Bearer token"}) + content = _read_log(tmp_path) + assert "Bearer token" not in content + assert "[REDACTED]" in content + + +def test_custom_redacted_headers(tmp_path): + cfg = LogConfig(log_folder=str(tmp_path), redacted_headers=frozenset({"x-custom-secret"})) + logger = _HttpLogger(cfg) + logger.log_request("GET", "https://example.com", headers={"X-Custom-Secret": "my-secret"}) + content = _read_log(tmp_path) + assert "my-secret" not in content + assert "[REDACTED]" in content + + +# --------------------------------------------------------------------------- +# Body truncation +# --------------------------------------------------------------------------- + + +def test_body_truncation(tmp_path): + logger = _make_logger(tmp_path, max_body_bytes=10) + logger.log_request("POST", "https://example.com", body="A" * 100) + content = _read_log(tmp_path) + assert "truncated" in content + assert "100 bytes total" in content + + +def test_body_not_truncated_when_under_limit(tmp_path): + logger = _make_logger(tmp_path, max_body_bytes=200) + logger.log_request("POST", "https://example.com", body="hello world") + content = _read_log(tmp_path) + assert "hello world" in content + assert "truncated" not in content + + +def test_zero_max_body_bytes_disables_body(tmp_path): + logger = _make_logger(tmp_path, max_body_bytes=0) + logger.log_request("POST", "https://example.com", body="should not appear") + content = _read_log(tmp_path) + assert "should not appear" not in content + + +def test_bytes_body_decoded(tmp_path): + logger = _make_logger(tmp_path, max_body_bytes=1024) + logger.log_request("POST", "https://example.com", body=b"binary content") + content = _read_log(tmp_path) + assert "binary content" in content + + +def test_dict_body_serialized(tmp_path): + logger = _make_logger(tmp_path, max_body_bytes=1024) + logger.log_request("POST", "https://example.com", body={"name": "Contoso"}) + content = _read_log(tmp_path) + assert "Contoso" in content + + +# --------------------------------------------------------------------------- +# log_request +# --------------------------------------------------------------------------- + + +def test_log_request_contains_method_and_url(tmp_path): + logger = _make_logger(tmp_path) + logger.log_request("POST", "https://example.crm.dynamics.com/api/data/v9.2/accounts") + content = _read_log(tmp_path) + assert ">>> REQUEST" in content + assert "POST" in content + assert "https://example.crm.dynamics.com/api/data/v9.2/accounts" in content + + +def test_log_request_no_body_no_body_line(tmp_path): + logger = _make_logger(tmp_path) + logger.log_request("GET", "https://example.com", body=None) + content = _read_log(tmp_path) + assert "Body:" not in content + + +# --------------------------------------------------------------------------- +# log_response +# --------------------------------------------------------------------------- + + +def test_log_response_contains_status_and_elapsed(tmp_path): + logger = _make_logger(tmp_path) + logger.log_response("GET", "https://example.com", status_code=200, elapsed_ms=123.4) + content = _read_log(tmp_path) + assert "<<< RESPONSE" in content + assert "200" in content + assert "123.4ms" in content + + +def test_log_response_no_elapsed_when_none(tmp_path): + logger = _make_logger(tmp_path) + logger.log_response("GET", "https://example.com", status_code=404, elapsed_ms=None) + content = _read_log(tmp_path) + assert "<<< RESPONSE" in content + assert "ms)" not in content + + +def test_log_response_not_captured_marker_when_body_disabled_and_content_exists(tmp_path): + """When body logging is disabled and Content-Length > 0, emit [not captured] marker.""" + logger = _make_logger(tmp_path) # max_body_bytes=0 by default + logger.log_response( + "GET", + "https://example.com", + status_code=200, + headers={"Content-Length": "230"}, + body=None, + ) + content = _read_log(tmp_path) + assert "[not captured" in content + + +def test_log_response_no_marker_when_body_disabled_and_no_content(tmp_path): + """When body logging is disabled and Content-Length is 0 (e.g. 204), no marker is shown.""" + logger = _make_logger(tmp_path) + logger.log_response( + "DELETE", + "https://example.com/resource", + status_code=204, + headers={"Content-Length": "0"}, + body=None, + ) + content = _read_log(tmp_path) + assert "not captured" not in content + assert "Body" not in content + + +def test_log_response_no_marker_when_body_enabled(tmp_path): + """When body logging is enabled, the [not captured] marker must never appear.""" + logger = _make_logger(tmp_path, max_body_bytes=4096) + logger.log_response( + "GET", + "https://example.com", + status_code=200, + headers={"Content-Length": "500"}, + body='{"value": []}', + ) + content = _read_log(tmp_path) + assert "not captured" not in content + assert '{"value": []}' in content + + +# --------------------------------------------------------------------------- +# log_error +# --------------------------------------------------------------------------- + + +def test_log_error_writes_error_entry(tmp_path): + logger = _make_logger(tmp_path) + logger.log_error("DELETE", "https://example.com/resource", ValueError("connection refused")) + content = _read_log(tmp_path) + assert "!!! ERROR" in content + assert "DELETE" in content + assert "ValueError" in content + assert "connection refused" in content + + +def test_log_error_includes_attempt_info(tmp_path): + """log_error must include attempt/max_attempts when provided.""" + logger = _make_logger(tmp_path) + logger.log_error( + "GET", + "https://example.com", + ConnectionError("timeout"), + attempt=2, + max_attempts=5, + ) + content = _read_log(tmp_path) + assert "[attempt 2/5]" in content + + +def test_log_error_no_attempt_info_when_omitted(tmp_path): + """log_error without attempt args must not include attempt info.""" + logger = _make_logger(tmp_path) + logger.log_error("GET", "https://example.com", ConnectionError("timeout")) + content = _read_log(tmp_path) + assert "attempt" not in content + + +# --------------------------------------------------------------------------- +# body_logging_enabled property +# --------------------------------------------------------------------------- + + +def test_body_logging_enabled_false_by_default(tmp_path): + """body_logging_enabled must be False when max_body_bytes=0 (the default).""" + logger = _make_logger(tmp_path) + assert logger.body_logging_enabled is False + + +def test_body_logging_enabled_true_when_set(tmp_path): + """body_logging_enabled must be True when max_body_bytes > 0.""" + logger = _make_logger(tmp_path, max_body_bytes=4096) + assert logger.body_logging_enabled is True + + +# --------------------------------------------------------------------------- +# Integration: _HttpClient with logger=None (no errors) +# --------------------------------------------------------------------------- + + +def test_http_client_no_logger_no_errors(): + from unittest.mock import MagicMock + + from PowerPlatform.Dataverse.core._http import _HttpClient + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {} + mock_resp.text = "" + + session = MagicMock() + session.request.return_value = mock_resp + + client = _HttpClient(session=session, logger=None) + resp = client._request("GET", "https://example.com") + assert resp.status_code == 200 + + +def test_http_client_with_logger_logs_request_and_response(tmp_path): + from unittest.mock import MagicMock + + from PowerPlatform.Dataverse.core._http import _HttpClient + + mock_resp = MagicMock() + mock_resp.status_code = 201 + mock_resp.headers = {"Content-Type": "application/json"} + mock_resp.text = '{"value": "ok"}' + + session = MagicMock() + session.request.return_value = mock_resp + + cfg = LogConfig(log_folder=str(tmp_path)) + http_logger = _HttpLogger(cfg) + client = _HttpClient(session=session, logger=http_logger) + client._request("POST", "https://example.com/api/data/v9.2/accounts", json={"name": "Test"}) + + content = _read_log(tmp_path) + assert ">>> REQUEST" in content + assert "POST" in content + assert "<<< RESPONSE" in content + assert "201" in content + + +def test_http_client_logs_attempt_number_on_retry(tmp_path): + """_HttpClient must pass attempt number and max_attempts to log_error on each retry.""" + import requests as req_lib + + from unittest.mock import MagicMock, patch + + from PowerPlatform.Dataverse.core._http import _HttpClient + + session = MagicMock() + session.request.side_effect = req_lib.exceptions.ConnectionError("timeout") + + cfg = LogConfig(log_folder=str(tmp_path)) + http_logger = _HttpLogger(cfg) + client = _HttpClient(retries=2, backoff=0, session=session, logger=http_logger) + + with patch("time.sleep"): # skip backoff delay + with pytest.raises(req_lib.exceptions.ConnectionError): + client._request("GET", "https://example.com") + + content = _read_log(tmp_path) + assert "[attempt 1/2]" in content + assert "[attempt 2/2]" in content + + +# --------------------------------------------------------------------------- +# Integration: DataverseConfig with log_config +# --------------------------------------------------------------------------- + + +def test_dataverse_config_log_config_field(tmp_path): + cfg = LogConfig(log_folder=str(tmp_path)) + dc = DataverseConfig(log_config=cfg) + assert dc.log_config is cfg + + +def test_dataverse_config_log_config_default_is_none(): + dc = DataverseConfig() + assert dc.log_config is None + + +def test_dataverse_config_from_env_log_config_none(): + dc = DataverseConfig.from_env() + assert dc.log_config is None + + +# --------------------------------------------------------------------------- +# Fix #2: empty dict body must be logged, not silently dropped +# --------------------------------------------------------------------------- + + +def test_http_client_logs_empty_dict_body(tmp_path): + """An empty JSON body {} is falsy but must still be logged (not skipped via `or`).""" + from unittest.mock import MagicMock + + from PowerPlatform.Dataverse.core._http import _HttpClient + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {} + mock_resp.text = "" + + session = MagicMock() + session.request.return_value = mock_resp + + cfg = LogConfig(log_folder=str(tmp_path), max_body_bytes=4096) + http_logger = _HttpLogger(cfg) + client = _HttpClient(session=session, logger=http_logger) + client._request("POST", "https://example.com/accounts", json={}) + + content = _read_log(tmp_path) + assert ">>> REQUEST" in content + assert "{}" in content + + +# --------------------------------------------------------------------------- +# Fix #3: resp.text not decoded when body logging disabled +# --------------------------------------------------------------------------- + + +def test_http_client_does_not_decode_response_body_when_logging_disabled(tmp_path): + """When max_body_bytes=0, resp.text must not be accessed (no unnecessary decoding).""" + from unittest.mock import MagicMock, PropertyMock + + from PowerPlatform.Dataverse.core._http import _HttpClient + + mock_resp = MagicMock() + mock_resp.status_code = 200 + mock_resp.headers = {} + # If resp.text is accessed, the test will fail + type(mock_resp).text = PropertyMock(side_effect=AssertionError("resp.text should not be accessed")) + + session = MagicMock() + session.request.return_value = mock_resp + + cfg = LogConfig(log_folder=str(tmp_path), max_body_bytes=0) + http_logger = _HttpLogger(cfg) + client = _HttpClient(session=session, logger=http_logger) + # Should not raise + client._request("GET", "https://example.com") + + +# --------------------------------------------------------------------------- +# Fix #4: _HttpLogger.close() releases file handle +# --------------------------------------------------------------------------- + + +def test_http_logger_close_releases_handler(tmp_path): + """close() flushes and removes the handler so the file handle is released.""" + logger = _make_logger(tmp_path) + logger.log_request("GET", "https://example.com") + logger.close() + # After close the internal logger should have no handlers + assert len(logger._logger.handlers) == 0 + + +def test_http_logger_close_is_idempotent(tmp_path): + """Calling close() twice must not raise.""" + logger = _make_logger(tmp_path) + logger.close() + logger.close() # should not raise + + +# --------------------------------------------------------------------------- +# Fix #5: filename uses microsecond precision (no collision) +# --------------------------------------------------------------------------- + + +def test_log_filenames_unique_for_rapid_creation(tmp_path): + """Two loggers created back-to-back get distinct filenames.""" + l1 = _make_logger(tmp_path) + l2 = _make_logger(tmp_path) + # Compare handler filepaths directly — avoids filesystem timing races where + # both loggers are created within the same microsecond. + path1 = l1._handler.baseFilename + path2 = l2._handler.baseFilename + l1.close() + l2.close() + assert path1 != path2 + + +# --------------------------------------------------------------------------- +# Fix #6: byte-correct truncation for Unicode bodies +# --------------------------------------------------------------------------- + + +def test_body_truncation_unicode_byte_accurate(tmp_path): + """Truncation respects byte budget even for multi-byte Unicode characters.""" + # Each '€' is 3 UTF-8 bytes; 10 bytes limit should cut within a few chars + logger = _make_logger(tmp_path, max_body_bytes=10) + body = "€" * 20 # 60 bytes total + logger.log_request("POST", "https://example.com", body=body) + content = _read_log(tmp_path) + assert "truncated" in content + assert "60 bytes total" in content + + +def test_body_truncation_reports_byte_count_not_char_count(tmp_path): + """The truncation message reports UTF-8 byte length, not character count.""" + # 5 chars × 3 bytes each = 15 bytes; limit 5 bytes → should report 15 bytes + logger = _make_logger(tmp_path, max_body_bytes=5) + body = "€€€€€" # 5 chars, 15 bytes + logger.log_request("POST", "https://example.com", body=body) + content = _read_log(tmp_path) + assert "15 bytes total" in content diff --git a/tests/unit/data/test_enum_optionset_payload.py b/tests/unit/data/test_enum_optionset_payload.py index 23925991..a6efdf5a 100644 --- a/tests/unit/data/test_enum_optionset_payload.py +++ b/tests/unit/data/test_enum_optionset_payload.py @@ -24,6 +24,7 @@ def __init__(self, language_code=1033): self.http_retries = 0 self.http_backoff = 0 self.http_timeout = 5 + self.log_config = None def _make_client(lang=1033):