From 4a6924e3bc2bdf0acdb9abdc0b655c9511462501 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gonzalo=20Pe=C3=B1a-Castellanos?= Date: Mon, 4 May 2026 17:55:44 -0500 Subject: [PATCH] feat(services): add S3CompatibleImageFileStorage (first cloud impl of ImageFileStorageBase) --- docs-old/configuration/object-storage.md | 182 +++++++++++++ .../docs/configuration/object-storage.mdx | 185 +++++++++++++ invokeai/app/api/dependencies.py | 11 +- .../app/services/config/config_default.py | 5 + .../services/image_files/image_files_s3.py | 255 ++++++++++++++++++ mkdocs.yml | 4 +- .../image_files/test_image_files_s3.py | 167 ++++++++++++ tests/test_config.py | 23 ++ 8 files changed, 830 insertions(+), 2 deletions(-) create mode 100644 docs-old/configuration/object-storage.md create mode 100644 docs/src/content/docs/configuration/object-storage.mdx create mode 100644 invokeai/app/services/image_files/image_files_s3.py create mode 100644 tests/app/services/image_files/test_image_files_s3.py diff --git a/docs-old/configuration/object-storage.md b/docs-old/configuration/object-storage.md new file mode 100644 index 00000000000..78b1297ae0a --- /dev/null +++ b/docs-old/configuration/object-storage.md @@ -0,0 +1,182 @@ +--- +title: Object Storage (S3-Compatible) +--- + +# Object Storage (S3-Compatible) + +InvokeAI can store generated images in any S3-compatible object store +instead of the local filesystem. This is intended for **multi-user and +hosted Invoke deployments** where the application server is ephemeral +(containers, autoscaled instances, Kubernetes pods) and image +artifacts need to live somewhere durable and shared across replicas. + +The implementation lives in +`invokeai/app/services/image_files/image_files_s3.py` and works with +AWS S3, [Backblaze B2](https://www.backblaze.com/cloud-storage), and +any other provider that speaks the S3 API. + +!!! note "Image files only — latents and presigned-URL delivery are follow-ups" + This release covers image files (`ImageFileStorageBase`). Latents + serialization (`ObjectSerializerBase`) and presigned-URL frontend + delivery (`UrlServiceBase`) still use the disk backend; both are + tracked as separate follow-up PRs. + +## When to use object storage + +Use the S3 backend when **any** of these apply: + +- You're running InvokeAI behind a load balancer with more than one + replica — they need to share a single image gallery. +- Your application servers are ephemeral and a local volume would not + persist across restarts. +- You want to back up, version, or apply lifecycle rules to generated + images using your object-store provider's tooling. +- You're hosting a multi-tenant Invoke instance and want to keep + artifacts off the application server entirely. + +For a single-user desktop install, **stay on the disk backend** — +it's simpler, faster, and benefits from an in-process LRU cache. + +## Selecting the backend + +The backend is selected by an environment variable (or the equivalent +`invokeai.yaml` setting in a future release): + +```sh +# Default; uses local filesystem under the InvokeAI root +INVOKEAI_STORAGE_BACKEND=disk + +# Use any S3-compatible bucket +INVOKEAI_STORAGE_BACKEND=s3 +``` + +When `s3` is selected, the variables in the next section are required. + +## Required configuration + +| Variable | Required | Description | +| -------------------------- | :------: | -------------------------------------------------------------------------------------------- | +| `INVOKEAI_S3_BUCKET` | yes | Bucket / container name. Must already exist; InvokeAI does not create it. | +| `INVOKEAI_S3_ENDPOINT_URL` | *see* | Provider endpoint. Required for non-AWS providers (e.g. B2's `https://s3.us-west-004.backblazeb2.com`). Omit to talk to AWS S3. | +| `INVOKEAI_S3_REGION` | no | Region name; defaults to `us-east-1`. Some providers ignore it but boto3 still needs a value. | + +### Credentials + +Credentials follow boto3's standard chain: `AWS_ACCESS_KEY_ID` and +`AWS_SECRET_ACCESS_KEY`, instance profiles, IRSA, +`~/.aws/credentials`, etc. For Backblaze deployments there's a +convenience mapping — see below. + +## Backblaze B2 + +InvokeAI honors the standard B2 environment variable names so you can +use the same credentials you'd give any other B2 tool. If +`AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` are unset and these are +present, they are mapped onto the AWS names when the boto3 client is +constructed (the process environment is **not** mutated). + +```sh +INVOKEAI_STORAGE_BACKEND=s3 +INVOKEAI_S3_BUCKET=my-invokeai-images +INVOKEAI_S3_ENDPOINT_URL=https://s3.us-west-004.backblazeb2.com +INVOKEAI_S3_REGION=us-west-004 + +B2_APPLICATION_KEY_ID=000xxxxxxxxxxxxxxxxxxxxx +B2_APPLICATION_KEY=K000xxxxxxxxxxxxxxxxxxxxxxxxxxx +``` + +The endpoint shown above (`s3.us-west-004.backblazeb2.com`) is the +default for B2 buckets in the `us-west-004` region — replace +`us-west-004` with whichever region your bucket lives in. You can +find the exact endpoint on the bucket's detail page in the B2 +console. + +!!! tip "Application keys, not master keys" + Create a B2 **application key** scoped to just the bucket + InvokeAI will use. This limits blast radius if the credentials + leak. + +## Other providers + +=== "AWS S3" + + ```sh + INVOKEAI_STORAGE_BACKEND=s3 + INVOKEAI_S3_BUCKET=my-invokeai-images + INVOKEAI_S3_REGION=us-east-1 + # Endpoint URL omitted: boto3 talks to AWS by default. + # Credentials via instance profile, IRSA, or AWS_ACCESS_KEY_ID/SECRET. + ``` + +=== "Backblaze B2" + + ```sh + INVOKEAI_STORAGE_BACKEND=s3 + INVOKEAI_S3_BUCKET=my-invokeai-images + INVOKEAI_S3_ENDPOINT_URL=https://s3.us-west-004.backblazeb2.com + INVOKEAI_S3_REGION=us-west-004 + B2_APPLICATION_KEY_ID=... + B2_APPLICATION_KEY=... + ``` + +Other S3-compatible providers work the same way: set +`INVOKEAI_S3_ENDPOINT_URL` to their endpoint and supply the +appropriate `AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`. + +## Object layout + +Inside the bucket, InvokeAI uses two prefixes that mirror the on-disk +layout so the database's image-name references continue to resolve +unambiguously: + +``` +/ + images/.png + thumbnails/.thumbnail.webp +``` + +InvokeAI also writes per-object user-metadata +(`invokeai-metadata`, `invokeai-workflow`, `invokeai-graph`) so +workflow and graph lookups can be served by a cheap `HEAD` request +without downloading the full PNG. + +## Performance trade-offs + +!!! note "Known trade-off: first-byte latency" + The disk backend keeps recently-accessed images in an in-process + LRU cache, so re-reads are essentially free. The S3 backend is + currently **stateless** — every read goes to the object store, + which means first-byte latency is meaningfully higher than disk, + especially for gallery scrolls that touch many thumbnails. + + This is by design for the initial release: it keeps the + implementation small and avoids cache-coherency bugs across + replicas. A read-through local cache is a planned follow-up, + gated on profiling. + +In practice this is not a problem for most multi-user deployments — +the frontend already paginates the gallery and modern object stores +are fast enough to keep the UI responsive. Single-user desktop +installs should not switch to S3 just for the architectural symmetry; +the disk backend is faster for that case. + +## Operational notes + +- **Bucket pre-creation.** The bucket must exist before InvokeAI + starts. The application does not create it and will fail loudly on + first write if the bucket is missing. +- **Credential rotation.** Restart the Invoke process to pick up new + credentials — the boto3 client is built once at startup. +- **Versioned buckets (B2).** B2 buckets retain prior object versions + by default. Use a lifecycle rule to age out hidden versions if you + don't want deleted images to linger. +- **Server-side encryption / object lock.** Configure these on the + bucket itself; InvokeAI passes uploads through unchanged. + +## Follow-ups + +The current scaffold covers image files only. Latents serialization +and presigned-URL frontend delivery (so browsers fetch directly from +the bucket) are tracked as separate follow-ups; until those land, +deployments using the S3 image backend will still have the +application server proxy image bytes to the browser. diff --git a/docs/src/content/docs/configuration/object-storage.mdx b/docs/src/content/docs/configuration/object-storage.mdx new file mode 100644 index 00000000000..f0d751bc810 --- /dev/null +++ b/docs/src/content/docs/configuration/object-storage.mdx @@ -0,0 +1,185 @@ +--- +title: Object Storage (S3-Compatible) +sidebar: + order: 4 +--- + +import { Aside, Tabs, TabItem } from '@astrojs/starlight/components' + +InvokeAI can store generated images in any S3-compatible object store +instead of the local filesystem. This is intended for **multi-user and +hosted Invoke deployments** where the application server is ephemeral +(containers, autoscaled instances, Kubernetes pods) and image artifacts +need to live somewhere durable and shared across replicas. + +The implementation lives in +`invokeai/app/services/image_files/image_files_s3.py` and works with +AWS S3, [Backblaze B2](https://www.backblaze.com/cloud-storage), and +any other provider that speaks the S3 API. + + + +## When to use object storage + +Use the S3 backend when **any** of these apply: + +- You're running InvokeAI behind a load balancer with more than one + replica — they need to share a single image gallery. +- Your application servers are ephemeral and a local volume would not + persist across restarts. +- You want to back up, version, or apply lifecycle rules to generated + images using your object-store provider's tooling. +- You're hosting a multi-tenant Invoke instance and want to keep + artifacts off the application server entirely. + +For a single-user desktop install, **stay on the disk backend** — it's +simpler, faster, and benefits from an in-process LRU cache. + +## Selecting the backend + +The backend is selected by an environment variable (or the equivalent +`invokeai.yaml` setting in a future release): + +```sh +# Default; uses local filesystem under the InvokeAI root +INVOKEAI_STORAGE_BACKEND=disk + +# Use any S3-compatible bucket +INVOKEAI_STORAGE_BACKEND=s3 +``` + +When `s3` is selected, the variables in the next section are required. + +## Required configuration + +| Variable | Required | Description | +| ----------------------------- | :------: | -------------------------------------------------------------------------------------------- | +| `INVOKEAI_S3_BUCKET` | yes | Bucket / container name. Must already exist; InvokeAI does not create it. | +| `INVOKEAI_S3_ENDPOINT_URL` | *see* | Provider endpoint. Required for non-AWS providers (e.g. B2's `https://s3.us-west-004.backblazeb2.com`). Omit to talk to AWS S3. | +| `INVOKEAI_S3_REGION` | no | Region name; defaults to `us-east-1`. Some providers ignore it but boto3 still needs a value. | + +### Credentials + +Credentials follow boto3's standard chain: `AWS_ACCESS_KEY_ID` and +`AWS_SECRET_ACCESS_KEY`, instance profiles, IRSA, `~/.aws/credentials`, +etc. For Backblaze deployments there's a convenience mapping — see +below. + +## Backblaze B2 + +InvokeAI honors the standard B2 environment variable names so you can +use the same credentials you'd give any other B2 tool. If +`AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY` are unset and these are +present, they are mapped onto the AWS names when the boto3 client is +constructed (the process environment is **not** mutated). + +```sh +INVOKEAI_STORAGE_BACKEND=s3 +INVOKEAI_S3_BUCKET=my-invokeai-images +INVOKEAI_S3_ENDPOINT_URL=https://s3.us-west-004.backblazeb2.com +INVOKEAI_S3_REGION=us-west-004 + +B2_APPLICATION_KEY_ID=000xxxxxxxxxxxxxxxxxxxxx +B2_APPLICATION_KEY=K000xxxxxxxxxxxxxxxxxxxxxxxxxxx +``` + +The endpoint shown above (`s3.us-west-004.backblazeb2.com`) is the +default for B2 buckets in the `us-west-004` region — replace +`us-west-004` with whichever region your bucket lives in. You can find +the exact endpoint on the bucket's detail page in the B2 console. + + + +## Other providers + + + +```sh +INVOKEAI_STORAGE_BACKEND=s3 +INVOKEAI_S3_BUCKET=my-invokeai-images +INVOKEAI_S3_REGION=us-east-1 +# Endpoint URL omitted: boto3 talks to AWS by default. +# Credentials via instance profile, IRSA, or AWS_ACCESS_KEY_ID/SECRET. +``` + + +```sh +INVOKEAI_STORAGE_BACKEND=s3 +INVOKEAI_S3_BUCKET=my-invokeai-images +INVOKEAI_S3_ENDPOINT_URL=https://s3.us-west-004.backblazeb2.com +INVOKEAI_S3_REGION=us-west-004 +B2_APPLICATION_KEY_ID=... +B2_APPLICATION_KEY=... +``` + + + +Other S3-compatible providers work the same way: set +`INVOKEAI_S3_ENDPOINT_URL` to their endpoint and supply the appropriate +`AWS_ACCESS_KEY_ID` / `AWS_SECRET_ACCESS_KEY`. + +## Object layout + +Inside the bucket, InvokeAI uses two prefixes that mirror the on-disk +layout so the database's image-name references continue to resolve +unambiguously: + +``` +/ + images/.png + thumbnails/.thumbnail.webp +``` + +InvokeAI also writes per-object user-metadata (`invokeai-metadata`, +`invokeai-workflow`, `invokeai-graph`) so workflow and graph lookups +can be served by a cheap `HEAD` request without downloading the full +PNG. + +## Performance trade-offs + + + +In practice this is not a problem for most multi-user deployments — +the frontend already paginates the gallery and modern object stores +are fast enough to keep the UI responsive. Single-user desktop +installs should not switch to S3 just for the architectural symmetry; +the disk backend is faster for that case. + +## Operational notes + +- **Bucket pre-creation.** The bucket must exist before InvokeAI + starts. The application does not create it and will fail loudly on + first write if the bucket is missing. +- **Credential rotation.** Restart the Invoke process to pick up new + credentials — the boto3 client is built once at startup. +- **Versioned buckets (B2).** B2 buckets retain prior object versions + by default. Use a lifecycle rule to age out hidden versions if you + don't want deleted images to linger. +- **Server-side encryption / object lock.** Configure these on the + bucket itself; InvokeAI passes uploads through unchanged. + +## Follow-ups + +The current scaffold covers image files only. Latents serialization +and presigned-URL frontend delivery (so browsers fetch directly from +the bucket) are tracked as separate follow-ups; until those land, +deployments using the S3 image backend will still have the application +server proxy image bytes to the browser. diff --git a/invokeai/app/api/dependencies.py b/invokeai/app/api/dependencies.py index ff55749f6b1..17e641e7fee 100644 --- a/invokeai/app/api/dependencies.py +++ b/invokeai/app/api/dependencies.py @@ -19,7 +19,9 @@ from invokeai.app.services.external_generation.external_generation_default import ExternalGenerationService from invokeai.app.services.external_generation.providers import GeminiProvider, OpenAIProvider from invokeai.app.services.external_generation.startup import sync_configured_external_starter_models +from invokeai.app.services.image_files.image_files_base import ImageFileStorageBase from invokeai.app.services.image_files.image_files_disk import DiskImageFileStorage +from invokeai.app.services.image_files.image_files_s3 import S3CompatibleImageFileStorage from invokeai.app.services.image_records.image_records_sqlite import SqliteImageRecordStorage from invokeai.app.services.images.images_default import ImageService from invokeai.app.services.invocation_cache.invocation_cache_memory import MemoryInvocationCache @@ -101,7 +103,14 @@ def initialize( if output_folder is None: raise ValueError("Output folder is not set") - image_files = DiskImageFileStorage(f"{output_folder}/images") + image_files: ImageFileStorageBase + if config.storage_backend == "s3": + image_files = S3CompatibleImageFileStorage( + bucket=config.s3_bucket, + endpoint_url=config.s3_endpoint_url, + ) + else: + image_files = DiskImageFileStorage(f"{output_folder}/images") model_images_folder = config.models_path style_presets_folder = config.style_presets_path diff --git a/invokeai/app/services/config/config_default.py b/invokeai/app/services/config/config_default.py index 729eb1332c0..d8d59bb2051 100644 --- a/invokeai/app/services/config/config_default.py +++ b/invokeai/app/services/config/config_default.py @@ -161,6 +161,11 @@ class InvokeAIAppConfig(BaseSettings): style_presets_dir: Path = Field(default=Path("style_presets"), description="Path to directory for style presets.") workflow_thumbnails_dir: Path = Field(default=Path("workflow_thumbnails"), description="Path to directory for workflow thumbnails.") + # STORAGE + storage_backend: Literal["disk", "s3"] = Field(default="disk", description='Backend for storing generated images. "disk" uses the local filesystem; "s3" uses any S3-compatible object store (AWS S3, Backblaze B2, etc.).') + s3_bucket: Optional[str] = Field(default=None, description='Bucket name for the s3 storage backend. Required when storage_backend="s3".') + s3_endpoint_url: Optional[str] = Field(default=None, description='Endpoint URL for the s3 storage backend. Leave unset to talk to AWS S3; set to a provider-specific URL (e.g. https://s3.us-west-004.backblazeb2.com for Backblaze B2) for any other S3-compatible store.') + # LOGGING log_handlers: list[str] = Field(default=["console"], description='Log handler. Valid options are "console", "file=", "syslog=path|address:host:port", "http=".') # note - would be better to read the log_format values from logging.py, but this creates circular dependencies issues diff --git a/invokeai/app/services/image_files/image_files_s3.py b/invokeai/app/services/image_files/image_files_s3.py new file mode 100644 index 00000000000..c6b0f4397b2 --- /dev/null +++ b/invokeai/app/services/image_files/image_files_s3.py @@ -0,0 +1,255 @@ +# Copyright (c) 2026 The InvokeAI Team +import io +import json +import os +from pathlib import Path +from typing import TYPE_CHECKING, Any, Optional + +from PIL import Image, PngImagePlugin +from PIL.Image import Image as PILImageType + +from invokeai.app.services.image_files.image_files_base import ImageFileStorageBase +from invokeai.app.services.image_files.image_files_common import ( + ImageFileDeleteException, + ImageFileNotFoundException, + ImageFileSaveException, +) +from invokeai.app.util.thumbnails import get_thumbnail_name, make_thumbnail + +if TYPE_CHECKING: + from invokeai.app.services.invoker import Invoker + + +_IMAGES_PREFIX = "images/" +_THUMBNAILS_PREFIX = "thumbnails/" +_USER_AGENT_SUFFIX = "invokeai" +_META_INVOKEAI_METADATA = "invokeai-metadata" +_META_INVOKEAI_WORKFLOW = "invokeai-workflow" +_META_INVOKEAI_GRAPH = "invokeai-graph" + + +class S3CompatibleImageFileStorage(ImageFileStorageBase): + """Stores images in any S3-compatible bucket""" + + def __init__( + self, + bucket: Optional[str] = None, + endpoint_url: Optional[str] = None, + region_name: Optional[str] = None, + client: Optional[Any] = None, + ) -> None: + try: + import boto3 # noqa: F401 + from botocore.config import Config as BotoConfig + except ImportError as e: # pragma: no cover + raise ImportError( + "boto3 is required for S3CompatibleImageFileStorage. Install it with `pip install boto3`." + ) from e + + self._bucket = bucket or os.environ.get("INVOKEAI_S3_BUCKET") + if not self._bucket: + raise ValueError( + "S3CompatibleImageFileStorage requires a bucket name (pass `bucket=` or set INVOKEAI_S3_BUCKET)." + ) + + self._endpoint_url = endpoint_url or os.environ.get("INVOKEAI_S3_ENDPOINT_URL") + self._region_name = region_name or os.environ.get("INVOKEAI_S3_REGION") or "us-east-1" + self._client = client if client is not None else self._build_client(BotoConfig) + + def _build_client(self, BotoConfig: Any) -> Any: + import boto3 + + # Map B2_* env vars onto AWS_* names without mutating os.environ globally. + access_key = os.environ.get("AWS_ACCESS_KEY_ID") or os.environ.get("B2_APPLICATION_KEY_ID") + secret_key = os.environ.get("AWS_SECRET_ACCESS_KEY") or os.environ.get("B2_APPLICATION_KEY") + + boto_config = BotoConfig( + user_agent_extra=_USER_AGENT_SUFFIX, + signature_version="s3v4", + retries={"max_attempts": 3, "mode": "standard"}, + ) + + kwargs: dict[str, Any] = { + "service_name": "s3", + "region_name": self._region_name, + "config": boto_config, + } + if self._endpoint_url: + kwargs["endpoint_url"] = self._endpoint_url + if access_key and secret_key: + kwargs["aws_access_key_id"] = access_key + kwargs["aws_secret_access_key"] = secret_key + + return boto3.client(**kwargs) + + @staticmethod + def _object_key(image_name: str, thumbnail: bool = False) -> str: + basename = Path(image_name).name + if basename != image_name: + raise ValueError("Invalid image name, potential directory traversal detected") + if thumbnail: + return f"{_THUMBNAILS_PREFIX}{get_thumbnail_name(basename)}" + return f"{_IMAGES_PREFIX}{basename}" + + def start(self, invoker: "Invoker") -> None: + self.__invoker = invoker + + def get(self, image_name: str) -> PILImageType: + from botocore.exceptions import ClientError + + key = self._object_key(image_name) + try: + response = self._client.get_object(Bucket=self._bucket, Key=key) + except ClientError as e: + code = e.response.get("Error", {}).get("Code", "") + if code in ("NoSuchKey", "404", "NotFound"): + raise ImageFileNotFoundException from e + raise + + body = response["Body"].read() + image = Image.open(io.BytesIO(body)) + image.load() + return image + + def save( + self, + image: PILImageType, + image_name: str, + metadata: Optional[str] = None, + workflow: Optional[str] = None, + graph: Optional[str] = None, + thumbnail_size: int = 256, + ) -> None: + try: + key = self._object_key(image_name) + thumb_key = self._object_key(image_name, thumbnail=True) + + pnginfo = PngImagePlugin.PngInfo() + info_dict: dict[str, str] = {} + obj_metadata: dict[str, str] = {} + + if metadata is not None: + info_dict["invokeai_metadata"] = metadata + pnginfo.add_text("invokeai_metadata", metadata) + obj_metadata[_META_INVOKEAI_METADATA] = _safe_meta(metadata) + if workflow is not None: + info_dict["invokeai_workflow"] = workflow + pnginfo.add_text("invokeai_workflow", workflow) + obj_metadata[_META_INVOKEAI_WORKFLOW] = _safe_meta(workflow) + if graph is not None: + info_dict["invokeai_graph"] = graph + pnginfo.add_text("invokeai_graph", graph) + obj_metadata[_META_INVOKEAI_GRAPH] = _safe_meta(graph) + + image.info = info_dict + + png_buffer = io.BytesIO() + image.save(png_buffer, format="PNG", pnginfo=pnginfo) + png_buffer.seek(0) + + put_kwargs: dict[str, Any] = { + "Bucket": self._bucket, + "Key": key, + "Body": png_buffer.getvalue(), + "ContentType": "image/png", + } + if obj_metadata: + put_kwargs["Metadata"] = obj_metadata + self._client.put_object(**put_kwargs) + + thumbnail_image = make_thumbnail(image, thumbnail_size) + thumb_buffer = io.BytesIO() + thumbnail_image.save(thumb_buffer, format="WEBP") + thumb_buffer.seek(0) + self._client.put_object( + Bucket=self._bucket, + Key=thumb_key, + Body=thumb_buffer.getvalue(), + ContentType="image/webp", + ) + except Exception as e: + raise ImageFileSaveException from e + + def delete(self, image_name: str) -> None: + try: + key = self._object_key(image_name) + thumb_key = self._object_key(image_name, thumbnail=True) + self._client.delete_object(Bucket=self._bucket, Key=key) + self._client.delete_object(Bucket=self._bucket, Key=thumb_key) + except Exception as e: + raise ImageFileDeleteException from e + + def get_path(self, image_name: str, thumbnail: bool = False) -> Path: + # Synthetic s3:// path; callers needing a real filesystem path should + # migrate to a presigned-URL service. + key = self._object_key(image_name, thumbnail=thumbnail) + return Path(f"s3://{self._bucket}/{key}") + + def validate_path(self, path: str) -> bool: + """Validates the path given for an image or thumbnail.""" + from botocore.exceptions import ClientError + + if isinstance(path, Path): + path = str(path) + prefix = f"s3://{self._bucket}/" + key = path[len(prefix) :] if path.startswith(prefix) else path + + try: + self._client.head_object(Bucket=self._bucket, Key=key) + return True + except ClientError as e: + code = e.response.get("Error", {}).get("Code", "") + if code in ("404", "NoSuchKey", "NotFound"): + return False + raise + + def get_workflow(self, image_name: str) -> str | None: + return self._get_text_metadata(image_name, _META_INVOKEAI_WORKFLOW, "invokeai_workflow") + + def get_graph(self, image_name: str) -> str | None: + return self._get_text_metadata(image_name, _META_INVOKEAI_GRAPH, "invokeai_graph") + + def _get_text_metadata(self, image_name: str, s3_meta_key: str, png_info_key: str) -> str | None: + from botocore.exceptions import ClientError + + key = self._object_key(image_name) + try: + head = self._client.head_object(Bucket=self._bucket, Key=key) + except ClientError as e: + code = e.response.get("Error", {}).get("Code", "") + if code in ("404", "NoSuchKey", "NotFound"): + raise ImageFileNotFoundException from e + raise + + # boto3 lower-cases user-metadata keys. + meta = head.get("Metadata", {}) or {} + value = meta.get(s3_meta_key) + if isinstance(value, str) and value: + return _unsafe_meta(value) + + image = self.get(image_name) + png_value = image.info.get(png_info_key, None) + if isinstance(png_value, str): + return png_value + return None + + +def _safe_meta(value: str) -> str: + """Encode arbitrary unicode metadata for safe transport as S3 user-metadata.""" + try: + value.encode("ascii") + return value + except UnicodeEncodeError: + return json.dumps({"__b64__": True, "v": value.encode("utf-8").hex()}) + + +def _unsafe_meta(value: str) -> str: + """Inverse of `_safe_meta`.""" + if value.startswith("{") and "__b64__" in value: + try: + payload = json.loads(value) + if isinstance(payload, dict) and payload.get("__b64__"): + return bytes.fromhex(payload["v"]).decode("utf-8") + except (ValueError, KeyError): + pass + return value diff --git a/mkdocs.yml b/mkdocs.yml index 833df7fc7ec..62e89d9e498 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -134,7 +134,9 @@ nav: - Contributing Nodes: 'nodes/contributingNodes.md' - Migrating from v3 to v4: 'nodes/NODES_MIGRATION_V3_V4.md' - Invocation API: 'nodes/invocation-api.md' - - Configuration: 'configuration.md' + - Configuration: + - Overview: 'configuration.md' + - Object Storage (S3-Compatible): 'configuration/object-storage.md' - Features: - LLM Prompt Tools: 'features/prompt-tools.md' - New to InvokeAI?: 'help/gettingStartedWithAI.md' diff --git a/tests/app/services/image_files/test_image_files_s3.py b/tests/app/services/image_files/test_image_files_s3.py new file mode 100644 index 00000000000..2aa37f77840 --- /dev/null +++ b/tests/app/services/image_files/test_image_files_s3.py @@ -0,0 +1,167 @@ +from __future__ import annotations + +from typing import Any +from unittest.mock import patch + +import pytest +from botocore.exceptions import ClientError +from PIL import Image + +from invokeai.app.services.image_files.image_files_common import ( + ImageFileNotFoundException, +) +from invokeai.app.services.image_files.image_files_s3 import S3CompatibleImageFileStorage + + +def _client_error(code: str) -> ClientError: + return ClientError({"Error": {"Code": code, "Message": code}}, "TestOperation") + + +class _FakeS3Client: + def __init__(self) -> None: + self.store: dict[str, dict[str, Any]] = {} + + def put_object(self, Bucket: str, Key: str, Body: bytes, **kwargs: Any) -> dict: + self.store[Key] = { + "Body": Body, + "Metadata": kwargs.get("Metadata", {}) or {}, + "ContentType": kwargs.get("ContentType"), + } + return {} + + def get_object(self, Bucket: str, Key: str) -> dict: + if Key not in self.store: + raise _client_error("NoSuchKey") + record = self.store[Key] + return {"Body": _BodyStream(record["Body"]), "Metadata": record["Metadata"]} + + def head_object(self, Bucket: str, Key: str) -> dict: + if Key not in self.store: + raise _client_error("404") + return {"Metadata": self.store[Key]["Metadata"]} + + def delete_object(self, Bucket: str, Key: str) -> dict: + self.store.pop(Key, None) + return {} + + +class _BodyStream: + def __init__(self, data: bytes) -> None: + self._data = data + + def read(self) -> bytes: + return self._data + + +def _make_storage() -> tuple[S3CompatibleImageFileStorage, _FakeS3Client]: + fake = _FakeS3Client() + storage = S3CompatibleImageFileStorage(bucket="test-bucket", client=fake) + return storage, fake + + +def _solid_png() -> Image.Image: + return Image.new("RGB", (8, 8), color=(255, 0, 0)) + + +def test_save_then_get_round_trips_image_bytes() -> None: + storage, fake = _make_storage() + storage.save(_solid_png(), "round-trip.png") + + assert "images/round-trip.png" in fake.store + assert "thumbnails/round-trip.webp" in fake.store + + fetched = storage.get("round-trip.png") + assert isinstance(fetched, Image.Image) + assert fetched.size == (8, 8) + assert fetched.getpixel((0, 0))[:3] == (255, 0, 0) + + +def test_get_missing_raises_image_file_not_found() -> None: + storage, _ = _make_storage() + with pytest.raises(ImageFileNotFoundException): + storage.get("does-not-exist.png") + + +def test_delete_removes_object_and_thumbnail() -> None: + storage, fake = _make_storage() + storage.save(_solid_png(), "to-delete.png") + assert "images/to-delete.png" in fake.store + + storage.delete("to-delete.png") + assert "images/to-delete.png" not in fake.store + assert "thumbnails/to-delete.webp" not in fake.store + + +def test_validate_path_for_present_and_missing_keys() -> None: + storage, _ = _make_storage() + storage.save(_solid_png(), "exists.png") + + assert storage.validate_path("images/exists.png") is True + assert storage.validate_path("s3://test-bucket/images/exists.png") is True + assert storage.validate_path("images/missing.png") is False + + +def test_get_path_returns_synthetic_s3_uri() -> None: + storage, _ = _make_storage() + p = str(storage.get_path("foo.png", thumbnail=True)) + assert "test-bucket" in p and "thumbnails/foo.webp" in p + + +def test_get_workflow_and_graph_via_object_metadata() -> None: + storage, _ = _make_storage() + storage.save(_solid_png(), "meta.png", workflow="WF-DATA", graph="GRAPH-DATA") + + assert storage.get_workflow("meta.png") == "WF-DATA" + assert storage.get_graph("meta.png") == "GRAPH-DATA" + + +def test_get_workflow_falls_back_to_png_pnginfo(monkeypatch: pytest.MonkeyPatch) -> None: + storage, fake = _make_storage() + storage.save(_solid_png(), "fallback.png", workflow="FROM-PNG") + fake.store["images/fallback.png"]["Metadata"] = {} + + assert storage.get_workflow("fallback.png") == "FROM-PNG" + + +def test_constructor_reads_invokeai_env_vars(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("INVOKEAI_S3_BUCKET", "env-bucket") + monkeypatch.setenv("INVOKEAI_S3_ENDPOINT_URL", "https://example.invalid") + monkeypatch.setenv("INVOKEAI_S3_REGION", "us-west-2") + + captured: dict[str, Any] = {} + + def _fake_boto_client(**kwargs: Any) -> Any: + captured.update(kwargs) + return _FakeS3Client() + + with patch("boto3.client", side_effect=_fake_boto_client): + storage = S3CompatibleImageFileStorage() + + assert storage._bucket == "env-bucket" + assert captured["endpoint_url"] == "https://example.invalid" + assert captured["region_name"] == "us-west-2" + + +def test_constructor_maps_b2_credentials_onto_aws(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False) + monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False) + monkeypatch.setenv("B2_APPLICATION_KEY_ID", "b2-id") + monkeypatch.setenv("B2_APPLICATION_KEY", "b2-secret") + + captured: dict[str, Any] = {} + + def _fake_boto_client(**kwargs: Any) -> Any: + captured.update(kwargs) + return _FakeS3Client() + + with patch("boto3.client", side_effect=_fake_boto_client): + S3CompatibleImageFileStorage(bucket="b2-bucket") + + assert captured["aws_access_key_id"] == "b2-id" + assert captured["aws_secret_access_key"] == "b2-secret" + + +def test_constructor_requires_bucket(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("INVOKEAI_S3_BUCKET", raising=False) + with pytest.raises(ValueError, match="bucket"): + S3CompatibleImageFileStorage(client=_FakeS3Client()) diff --git a/tests/test_config.py b/tests/test_config.py index 960727b180e..55ca0b2e41e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -332,3 +332,26 @@ def test_deny_nodes(patch_rootdir): # Reset the config so that it doesn't affect other tests get_config.cache_clear() InvocationRegistry.invalidate_invocation_typeadapter() + + +def test_storage_backend_defaults_to_disk(patch_rootdir: None): + config = InvokeAIAppConfig() + assert config.storage_backend == "disk" + assert config.s3_bucket is None + assert config.s3_endpoint_url is None + + +def test_storage_backend_accepts_s3(patch_rootdir: None): + config = InvokeAIAppConfig( + storage_backend="s3", + s3_bucket="my-bucket", + s3_endpoint_url="https://s3.us-west-004.backblazeb2.com", + ) + assert config.storage_backend == "s3" + assert config.s3_bucket == "my-bucket" + assert config.s3_endpoint_url == "https://s3.us-west-004.backblazeb2.com" + + +def test_storage_backend_rejects_unknown_value(patch_rootdir: None): + with pytest.raises(ValidationError): + InvokeAIAppConfig(storage_backend="azure")