diff --git a/bench/ctable/bench_nested_parquet_roundtrip.py b/bench/ctable/bench_nested_parquet_roundtrip.py
new file mode 100644
index 00000000..33b8030b
--- /dev/null
+++ b/bench/ctable/bench_nested_parquet_roundtrip.py
@@ -0,0 +1,82 @@
+#!/usr/bin/env python
+
+from __future__ import annotations
+
+import argparse
+import os
+import shutil
+import tempfile
+import time
+from pathlib import Path
+
+import pyarrow as pa
+import pyarrow.parquet as pq
+
+import blosc2
+
+
+def _dir_size(path: Path) -> int:
+    total = 0
+    for root, _, files in os.walk(path):
+        for f in files:
+            total += (Path(root) / f).stat().st_size
+    return total
+
+
+def main() -> None:
+    p = argparse.ArgumentParser(description="Benchmark CTable nested Parquet roundtrip")
+    p.add_argument("parquet", help="Input Parquet file")
+    p.add_argument("--rows", type=int, default=0, help="Sample first N rows (0 = full file)")
+    p.add_argument("--keep", action="store_true", help="Keep temporary outputs")
+    args = p.parse_args()
+
+    src = Path(args.parquet)
+    if not src.exists():
+        raise FileNotFoundError(src)
+
+    workdir = Path(tempfile.mkdtemp(prefix="b2-nested-bench-"))
+    sample_path = workdir / "sample.parquet"
+    out_b2d = workdir / "out.b2d"
+    out_parquet = workdir / "out.parquet"
+
+    try:
+        input_path = src
+        if args.rows > 0:
+            pf = pq.ParquetFile(src)
+            batch = next(pf.iter_batches(batch_size=args.rows))
+            table = pa.Table.from_batches([batch], schema=pf.schema_arrow)
+            pq.write_table(table, sample_path)
+            input_path = sample_path
+
+        t0 = time.perf_counter()
+        t = blosc2.CTable.from_parquet(str(input_path))
+        t1 = time.perf_counter()
+
+        t.save(str(out_b2d), overwrite=True)
+        t2 = time.perf_counter()
+
+        t.to_parquet(str(out_parquet))
+        t3 = time.perf_counter()
+
+        print("=== CTable nested Parquet roundtrip benchmark ===")
+        print(f"input:               {input_path}")
+        print(f"rows:                {t.nrows}")
+        print(f"columns:             {len(t.col_names)}")
+        print(f"from_parquet (s):    {t1 - t0:.3f}")
+        print(f"save b2d (s):        {t2 - t1:.3f}")
+        print(f"to_parquet (s):      {t3 - t2:.3f}")
+        print(f"input bytes:         {input_path.stat().st_size}")
+        print(f"output parquet:      {out_parquet.stat().st_size}")
+        print(f"output b2d bytes:    {_dir_size(out_b2d)}")
+        print(f"workdir:             {workdir}")
+
+        if not args.keep:
+            shutil.rmtree(workdir)
+    except Exception:
+        if not args.keep:
+            shutil.rmtree(workdir, ignore_errors=True)
+        raise
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bench/ctable/where-nulls.py b/bench/ctable/where-nulls.py
new file mode 100644
index 00000000..5b47285d
--- /dev/null
+++ b/bench/ctable/where-nulls.py
@@ -0,0 +1,118 @@
+#######################################################################
+# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#######################################################################
+
+"""Create a persistent nullable CTable for where() benchmarks.
+
+Usage:
+    python bench/ctable/where-nulls.py table.b2d
+    python bench/ctable/where-nulls.py table.b2z
+"""
+
+from __future__ import annotations
+
+import argparse
+from dataclasses import dataclass
+from pathlib import Path
+from time import perf_counter
+
+import numpy as np
+
+import blosc2
+
+NROWS = 500_000_000
+NULL_VALUE = 500
+RNG_SEED = 42
+
+
+@dataclass
+class Row:
+    nrow: int = blosc2.field(blosc2.int64(ge=0))
+    col1: int = blosc2.field(blosc2.int64(ge=0, le=1000, null_value=NULL_VALUE), default=None)
+    col2: int = blosc2.field(blosc2.int64(ge=0, le=1000, null_value=NULL_VALUE), default=None)
+
+
+DTYPE = np.dtype(
+    [
+        ("nrow", np.int64),
+        ("col1", np.int64),
+        ("col2", np.int64),
+    ]
+)
+
+
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description=__doc__)
+    parser.add_argument(
+        "urlpath",
+        help="Output table path. Use a .b2d directory or a .b2z file extension.",
+    )
+    return parser.parse_args()
+
+
+def check_urlpath(urlpath: str) -> str:
+    suffix = Path(urlpath).suffix
+    if suffix not in {".b2d", ".b2z"}:
+        raise SystemExit("urlpath must end in .b2d (directory-backed) or .b2z (zip-backed)")
+    return suffix[1:]
+
+
+def make_nullable_column(rng: np.random.Generator) -> np.ndarray:
+    # Normal distribution centered at 500, with practically all values in [0, 1000].
+    return np.rint(rng.normal(loc=500, scale=50, size=NROWS)).clip(0, 1000).astype(np.int64)
+
+
+def make_data() -> np.ndarray:
+    rng = np.random.default_rng(RNG_SEED)
+    data = np.empty(NROWS, dtype=DTYPE)
+    data["nrow"] = np.arange(NROWS, dtype=np.int64)
+    data["col1"] = make_nullable_column(rng)
+    data["col2"] = make_nullable_column(rng)
+    return data
+
+
+def fmt_bytes(nbytes: int) -> str:
+    for unit in ("B", "KiB", "MiB", "GiB"):
+        if abs(nbytes) < 1024 or unit == "GiB":
+            return f"{nbytes:.2f} {unit}" if unit != "B" else f"{nbytes} {unit}"
+        nbytes /= 1024
+    return f"{nbytes:.2f} GiB"
+
+
+def main() -> None:
+    args = parse_args()
+    format_name = check_urlpath(args.urlpath)
+
+    t0 = perf_counter()
+    data = make_data()
+    nulls_col1 = int(np.count_nonzero(data["col1"] == NULL_VALUE))
+    nulls_col2 = int(np.count_nonzero(data["col2"] == NULL_VALUE))
+
+    table = blosc2.CTable(Row, urlpath=args.urlpath, mode="w", expected_size=NROWS, validate=False)
+    table.extend(data, validate=False)
+    elapsed = perf_counter() - t0
+
+    print("CTable nullable where() benchmark data created")
+    print("=" * 52)
+    print(f"urlpath:         {args.urlpath}")
+    print(f"format:          {format_name}")
+    print(f"rows:            {len(table):,}")
+    print(f"columns:         {', '.join(table.col_names)}")
+    print(f"null sentinel:   {NULL_VALUE}")
+    print(f"col1 nulls:      {nulls_col1:,}")
+    print(f"col2 nulls:      {nulls_col2:,}")
+    print(f"uncompressed:    {fmt_bytes(table.nbytes)}")
+    print(f"compressed:      {fmt_bytes(table.cbytes)}")
+    print(f"compression:     {table.cratio:.2f}x")
+    print(f"creation time:   {elapsed:.3f} s")
+    print()
+    print(table)
+
+    table.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bench/large-dict-store.py b/bench/large-dict-store.py
new file mode 100644
index 00000000..c1524544
--- /dev/null
+++ b/bench/large-dict-store.py
@@ -0,0 +1,137 @@
+#######################################################################
+# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
+# All rights reserved.
+#
+# This source code is licensed under a BSD-style license (found in the
+# LICENSE file in the root directory of this source tree)
+#######################################################################
+import os
+import time
+import numpy as np
+import blosc2
+from blosc2 import DictStore
+from memory_profiler import memory_usage
+
+def make_arrays(n, min_size, max_size, dtype="f8"):
+    sizes = np.linspace(min_size, max_size, n).astype(int)
+    #arrays = [blosc2.arange(size, dtype=dtype) for size in sizes]
+    arrays = [blosc2.linspace(0, 1, size, dtype=dtype) for size in sizes]
+    #arrays = [np.random.randint(0, 100, size=size, dtype=dtype) for size in sizes]
+    # Calculate uncompressed size
+    uncompressed_size = sum(arr.nbytes for arr in arrays)
+    print(f"Uncompressed data size: {uncompressed_size / 1e9:.2f} GB")
+    return arrays, sizes, uncompressed_size
+
+def get_file_size(filepath):
+    """Get file size in MB."""
+    if os.path.exists(filepath):
+        return os.path.getsize(filepath) / 2**20
+    return 0
+
+def check_arrays(tree_path, arrays, prefix="node"):
+    print("Checking stored arrays...")
+    tree = DictStore(tree_path, mode="r")
+    for i, arr in enumerate(arrays):
+        stored_arr = tree[f"/{prefix}{i}"][:]
+        if not np.allclose(arr, stored_arr):
+            raise ValueError(f"Array mismatch at {prefix}{i}")
+
+def run_embed_tree(arrays, threshold, tree_path, uncompressed_size, check=False):
+    def embed_process():
+        tree = DictStore(tree_path, mode="w", threshold=threshold)
+        for i, arr in enumerate(arrays):
+            tree[f"/node{i}"] = arr
+        tree.close()
+
+    t0 = time.time()
+    mem_usage = memory_usage((embed_process, ()), interval=0.1)
+    t1 = time.time()
+    peak_mem = max(mem_usage) - min(mem_usage)
+    file_size = get_file_size(tree_path)
+    compression_ratio = uncompressed_size / (file_size * 2**20) if file_size > 0 else 0
+    print(f"[Embed] Time: {t1-t0:.2f}s, Memory: {peak_mem:.2f} MB, File size: {file_size:.2f} MB,"
+          f" Compression: {compression_ratio:.1f}x")
+
+    if check:
+        check_arrays(tree_path, arrays, prefix="node")
+
+    return t1-t0, peak_mem, file_size
+
+def run_external_tree(arrays, threshold, tree_path, arr_prefix, uncompressed_size, check=False):
+    def external_process():
+        tree = DictStore(tree_path, mode="w", threshold=threshold)
+        for i, arr in enumerate(arrays):
+            arr_path = f"{arr_prefix}_node{i}.b2nd"
+            arr_b2 = blosc2.asarray(arr, urlpath=arr_path, mode="w")
+            tree[f"/node{i}"] = arr_b2
+        tree.close()
+
+    t0 = time.time()
+    mem_usage = memory_usage((external_process, ()), interval=0.1)
+    t1 = time.time()
+    peak_mem = max(mem_usage) - min(mem_usage)
+    file_size = get_file_size(tree_path)
+    total_external_size = sum(get_file_size(f"{arr_prefix}_node{i}.b2nd") for i in range(len(arrays)))
+    total_size_mb = (file_size + total_external_size)
+    compression_ratio = uncompressed_size / (total_size_mb * 2**20) if total_size_mb > 0 else 0
+    print(f"[External] Time: {t1-t0:.2f}s, Memory: {peak_mem:.2f} MB, DictStore file size: {file_size:.2f} MB,"
+          f" External files size: {total_external_size:.2f} MB, Total: {total_size_mb:.2f} MB,"
+          f" Compression: {compression_ratio:.1f}x")
+
+    if check:
+        check_arrays(tree_path, arrays, prefix="node")
+
+    return t1-t0, peak_mem, file_size, total_external_size
+
+def cleanup_files(tree_path, arr_prefix, n):
+    if os.path.exists(tree_path):
+        os.remove(tree_path)
+    for i in range(n):
+        arr_path = f"{arr_prefix}_node{i}.b2nd"
+        if os.path.exists(arr_path):
+            os.remove(arr_path)
+
+if __name__ == "__main__":
+    N = 10
+    min_size = int(1e6)   # 1 MB
+    max_size = int(1e8)   # 100 MB
+    threshold = 2**23     # 8 MB threshold before using external arrays
+    print(f"Creating {N} arrays with sizes ranging from {min_size / 1e6:.2f} to {max_size / 1e6:.2f} MB...")
+    arrays, sizes, uncompressed_size = make_arrays(N, min_size, max_size)
+
+    print("Benchmarking DictStore with embed arrays...")
+    tree_path_embed = "large_dict_store_embed.b2z"
+    t_embed, mem_embed, file_size_embed = run_embed_tree(arrays, None, tree_path_embed, uncompressed_size)
+
+    print("Benchmarking DictStore with external arrays with threshold...")
+    tree_path_external = "large_dict_store_external_threshold.b2z"
+    arr_prefix = "large_external"
+    t_t_external, mem_t_external, file_t_size_external, external_t_size = (
+        run_external_tree(arrays, threshold, tree_path_external, arr_prefix, uncompressed_size))
+
+    print("Benchmarking DictStore with external arrays with no threshold...")
+    tree_path_external_noth = "large_dict_store_external_nothreshold.b2z"
+    arr_prefix = "large_external_noth"
+    t_external, mem_external, file_size_external, external_size = (
+        run_external_tree(arrays, None, tree_path_external_noth, arr_prefix, uncompressed_size))
+
+    print("\nSummary:")
+    print(f"Embed arrays:   Time = {t_embed:.2f}s, Memory = {mem_embed:.2f} MB,"
+          f" File size = {file_size_embed:.2f} MB")
+    print(f"External arrays (th: {threshold / 2**20:.2f} MB):   Time = {t_t_external:.2f}s, Memory = {mem_t_external:.2f} MB,"
+          f" DictStore file size = {file_t_size_external:.2f} MB, External files size = {external_t_size:.2f} MB")
+    print(f"External arrays:   Time = {t_external:.2f}s, Memory = {mem_external:.2f} MB,"
+          f" DictStore file size = {file_size_external:.2f} MB, External files size = {external_size:.2f} MB")
+
+    speedup = t_embed / t_external if t_external > 0 else float('inf')
+    mem_ratio = mem_embed / mem_external if mem_external > 0 else float('inf')
+    file_ratio = file_size_embed / file_size_external if file_size_external > 0 else float('inf')
+    storage_ratio = file_size_embed / (file_size_external)
+    print(f"Time ratio (embed/external): {speedup:.2f}x")
+    print(f"Memory ratio (embed/external): {mem_ratio:.2f}x")
+    print(f"File size ratio (embed/external tree): {file_ratio:.2f}x")
+    print(f"Storage efficiency (embed vs total external): {storage_ratio:.2f}x")
+
+    # cleanup_files(tree_path_embed, arr_prefix, N)
+    # cleanup_files(tree_path_external, arr_prefix, N)
+    # cleanup_files(tree_path_external_noth, arr_prefix_noth, N)
diff --git a/doc/reference/ctable.rst b/doc/reference/ctable.rst
index 35b44312..12e99ea0 100644
--- a/doc/reference/ctable.rst
+++ b/doc/reference/ctable.rst
@@ -75,6 +75,19 @@ Construction
 .. automethod:: CTable.from_csv
 
 
+Parquet interoperability
+------------------------
+
+Parquet import/export is intended as logical data interchange between Parquet
+and Blosc2 CTable, not as exact preservation of Parquet's physical layout. For
+example, Parquet files whose top-level schema is an unnamed ``list<struct<...>>``
+may be imported as a regular CTable whose rows are the list elements and whose
+nested scalar fields are exposed as ordinary dotted columns. Exporting such a
+table writes a valid logical Parquet table, but does not attempt to reconstruct
+the original unnamed root-list grouping, row groups, encoding choices, or file
+metadata exactly.
+
+
 Null policy
 -----------
 
@@ -615,12 +628,129 @@ to a typed representation.  They are not used as an implicit fallback during
 Parquet import; unsupported Arrow/Parquet types still raise unless explicitly
 imported through :meth:`CTable.from_arrow` with ``object_fallback=True``.
 
+Nested fields
+-------------
+
+CTable supports first-class **nested struct schemas** by physically flattening
+struct leaves into independent compressed columns.  This keeps analytics fast
+(each leaf is an ordinary :class:`~blosc2.NDArray`), while preserving the
+logical nested row shape on read.
+
+**Automatic flattening from Arrow / Parquet**
+
+When :meth:`CTable.from_arrow` or :meth:`CTable.from_parquet` encounters a
+top-level ``struct<…>`` field, it recursively flattens every scalar leaf into a
+dotted column name and stores each leaf as its own physical column::
+
+    import pyarrow as pa
+    import blosc2
+
+    trip_type = pa.struct([
+        ("begin", pa.struct([("lon", pa.float64()), ("lat", pa.float64())])),
+        ("end",   pa.struct([("lon", pa.float64()), ("lat", pa.float64())])),
+    ])
+    schema = pa.schema([pa.field("trip", trip_type),
+                        pa.field("fare", pa.float64())])
+    batch = pa.record_batch(
+        [pa.array([{"begin": {"lon": -87.6, "lat": 41.8},
+                    "end":   {"lon": -87.7, "lat": 41.9}}],
+                  type=trip_type),
+         pa.array([12.5])],
+        schema=schema,
+    )
+
+    t = blosc2.CTable.from_arrow(schema, [batch])
+    # t.col_names → ['trip.begin.lon', 'trip.begin.lat',
+    #                 'trip.end.lon',   'trip.end.lat', 'fare']
+
+**Column access**
+
+Nested leaves are accessed with their dotted logical name or via chained
+attribute proxies::
+
+    t["trip.begin.lon"].mean()      # Column object (fast path)
+    t.trip.begin.lon.max()          # attribute proxy, same column
+
+A literal ``.``, ``/``, or ``\\`` inside an Arrow field name is escaped with a
+backslash in the logical column name.  For example, path segments
+``("trip.info", "begin/point", "lon.deg")`` become::
+
+    t[r"trip\.info.begin\/point.lon\.deg"]
+
+Such leaves are stored with percent-encoded path segments under ``_cols``; the
+example above is stored at ``_cols/trip%2Einfo/begin%2Fpoint/lon%2Edeg``.
+
+**Filtering and expressions**
+
+Dotted names work everywhere a flat column name would::
+
+    t.where("trip.begin.lon > -87.7 and fare > 10")
+    t.where(t.trip.begin.lon > -87.7)
+
+**Select / projection**
+
+A struct prefix expands to all descendant leaves::
+
+    t.select(["trip.begin"])        # → columns trip.begin.lon, trip.begin.lat
+    t.select(["trip"])              # → all four trip.* leaves
+
+**Indexes and aggregates**
+
+Scalar leaf columns support all the same operations as flat columns::
+
+    t.create_index(col_name="trip.begin.lon")
+    t.where("trip.begin.lon > -87.7").nrows   # uses the index
+
+**Row reconstruction**
+
+Single-row access reconstructs the original nested dict shape::
+
+    row = t[0]
+    row.trip       # → {"begin": {"lon": ..., "lat": ...}, "end": {...}}
+    row.fare       # → 12.5
+
+**Inserting nested rows**
+
+:meth:`CTable.append` and :meth:`CTable.extend` accept either the flat dotted
+form or the original nested dict / list-of-dicts shape::
+
+    # flat dotted keys
+    t.append({"trip.begin.lon": -87.6, "trip.begin.lat": 41.8,
+              "trip.end.lon": -87.7,   "trip.end.lat": 41.9, "fare": 12.5})
+
+    # original nested dict (auto-flattened)
+    t.append({"trip": {"begin": {"lon": -87.6, "lat": 41.8},
+                        "end":   {"lon": -87.7, "lat": 41.9}},
+              "fare": 12.5})
+
+    # extend with a list of nested dicts
+    t.extend([
+        {"trip": {"begin": {"lon": -87.6, "lat": 41.8},
+                  "end":   {"lon": -87.7, "lat": 41.9}}, "fare": 12.5},
+        {"trip": {"begin": {"lon": -87.5, "lat": 41.7},
+                  "end":   {"lon": -87.8, "lat": 41.6}}, "fare": 8.0},
+    ])
+
+**Physical storage layout**
+
+Leaf columns are stored under a hierarchical path in the backing container:
+``/_cols/trip/begin/lon``, ``/_cols/trip/begin/lat``, etc.  Intermediate nodes
+are namespaces only; no data is stored at non-leaf levels.
+
+**Arrow / Parquet round-trip**
+
+:meth:`CTable.to_parquet` and :meth:`CTable.to_arrow` reconstruct the original
+nested Arrow schema from the stored metadata, so round-trips are lossless::
+
+    t.to_parquet("out.parquet")    # Arrow schema has top-level "trip" struct
+
 Struct columns
 --------------
 
 Struct columns are declared with :func:`blosc2.struct` and store one dictionary
 (or ``None`` when nullable) per row in batched variable-length storage.  They are
-also used when importing top-level Arrow/Parquet ``struct<...>`` columns::
+also used when importing top-level Arrow/Parquet ``struct<...>`` columns when
+**not** using the nested-leaf flattening path described above::
 
     from dataclasses import dataclass
     import blosc2 as b2
diff --git a/plans/ctable-dictionary-type.md b/plans/ctable-dictionary-type.md
new file mode 100644
index 00000000..348cdc7b
--- /dev/null
+++ b/plans/ctable-dictionary-type.md
@@ -0,0 +1,688 @@
+# Plan: CTable dictionary/categorical column type
+
+## Motivation
+
+Real-world Parquet files frequently contain Arrow dictionary-encoded columns, especially repeated string
+columns. Arrow represents these as:
+
+```text
+dictionary<values=string, indices=int32, ordered=0>
+```
+
+Today, `CTable.from_arrow()` does not support Arrow dictionary types directly. The compatibility fallback is
+to decode dictionaries to plain strings before import, but this loses the compact representation and prevents
+fast integer-code indexing.
+
+Add a CTable dictionary column type with Arrow-like semantics:
+
+```python
+blosc2.dictionary(
+    index_type=blosc2.int32(), value_type=blosc2.vlstring(), ordered=False
+)
+```
+
+For v1, keep the implementation intentionally narrow and optimized for the common Parquet case: string
+categories represented by signed 32-bit codes.
+
+## Goals for v1
+
+- Add a public dictionary column spec.
+- Support dictionary columns in CTable schemas and persistent metadata.
+- Store dictionary columns as stable integer codes plus a dictionary of unique string values.
+- Import Arrow/Parquet dictionary-encoded string columns without decoding to full strings.
+- Export CTable dictionary columns back to Arrow dictionary arrays.
+- Allow decoded reads by default while exposing codes and dictionary values for advanced users.
+- Enable equality/membership filtering to operate on integer codes.
+- Make dictionary columns indexable by indexing their codes.
+- Ensure the real-world `~/Downloads/chicago-taxi.parquet` dataset can round-trip to/from Blosc2 format.
+
+## Non-goals for v1
+
+- General value types beyond `vlstring`.
+- General index types beyond internal `int32`.
+- Nested dictionary columns inside list/struct fields.
+- Dictionary compaction/removal of unused categories.
+- Ordered comparisons (`<`, `>`, sorting) beyond storing the `ordered` flag.
+- Per-chunk or per-batch dictionaries.
+- Schema-less/object fallback support for dictionaries.
+
+## Public API
+
+### Column spec
+
+Add:
+
+```python
+blosc2.dictionary(
+    index_type=blosc2.int32(),
+    value_type=blosc2.vlstring(),
+    ordered=False,
+    nullable=True,
+)
+```
+
+For v1:
+
+- `index_type` must be `blosc2.int32()`.
+- `value_type` must be `blosc2.vlstring()`.
+- `ordered` is persisted and exported to Arrow, but ordered comparisons are not implemented initially.
+- `nullable=True` means row slots may be null. Nulls are represented internally by code `-1`.
+- `nullable=False` rejects null slots during writes/import.
+
+Consider an alias later:
+
+```python
+blosc2.categorical(...)
+```
+
+but implement only `dictionary` first to match Arrow terminology.
+
+### Example schema usage
+
+```python
+from dataclasses import dataclass
+import blosc2
+
+
+@dataclass
+class Trip:
+    vendor: str = blosc2.field(
+        blosc2.dictionary(index_type=blosc2.int32(), value_type=blosc2.vlstring())
+    )
+    fare: float = blosc2.field(blosc2.float64())
+```
+
+### Column access
+
+Default reads should return decoded values:
+
+```python
+ct["vendor"][:]  # ["Uber", "Lyft", None, "Uber"]
+ct["vendor"][0]  # "Uber"
+```
+
+Expose internals explicitly:
+
+```python
+ct["vendor"].codes[:]  # np.ndarray(dtype=int32), e.g. [0, 1, -1, 0]
+ct["vendor"].dictionary[:]  # ["Uber", "Lyft"]
+```
+
+Use `.dictionary` as the preferred public name for unique values because it matches Arrow terminology and the
+`blosc2.dictionary(...)` spec name. A pandas-friendly `.categories` alias can be considered later, but should
+not be part of the v1 API unless it falls out naturally.
+
+Useful methods/properties:
+
+```python
+col.codes  # fixed-width NDArray-like codes storage
+col.dictionary  # varlen string array of unique values
+col.encode(values)  # values -> int32 codes, extending dictionary if allowed
+col.decode(codes)  # codes -> values
+col.value_to_code(value)  # single value lookup; KeyError if absent
+col.code_to_value(code)  # single code lookup
+```
+
+For v1, keep mutation methods minimal and internal if needed. Public `.codes` and `.dictionary` are enough
+for inspection and debugging.
+
+Logical slice reads should follow existing `vlstring` behavior and return Python lists, not NumPy object arrays:
+
+```python
+ct["vendor"][:]  # ["Uber", "Lyft", None, "Uber"]
+```
+
+## Semantics
+
+### Logical model
+
+A dictionary column is logically:
+
+```text
+row slot -> int32 code -> dictionary value
+```
+
+Example:
+
+```text
+codes:       [0, 1, 0, -1]
+dictionary: ["Uber", "Lyft"]
+decoded:    ["Uber", "Lyft", "Uber", None]
+```
+
+### Nulls
+
+Use reserved code `-1` for null row slots.
+
+Rationale:
+
+- `int32` codes give a simple, compact null representation.
+- Code comparisons and indexes can include null slots naturally.
+- This avoids a separate validity bitmap for v1 dictionary columns.
+
+Rules:
+
+- Valid category codes are `0 <= code < len(dictionary)`.
+- `-1` means null slot.
+- Codes `< -1` are invalid.
+- If `nullable=False`, attempts to write/import null slots raise `ValueError`.
+- Dictionary values themselves should not be null in v1. Null is represented only by slot code `-1`.
+
+### Dictionary growth
+
+Use an append-only global dictionary per column.
+
+- New string values append to the dictionary and receive the next code.
+- Existing values reuse their existing code.
+- Deleting table rows does not remove dictionary values.
+- Updating a row to a new value may append a new dictionary value.
+- Codes are stable for the life of the column.
+
+No automatic compaction in v1. A future explicit operation can be added:
+
+```python
+ct["vendor"].compact_dictionary()
+```
+
+but this requires recoding all codes and rebuilding any indexes, so defer it.
+
+### Maximum cardinality
+
+Because v1 uses signed `int32` and reserves `-1` for null, the maximum number of categories is:
+
+```text
+2_147_483_648
+```
+
+Practically, memory/storage constraints will be hit earlier. If appending a new category would exceed
+`np.iinfo(np.int32).max`, raise `OverflowError`.
+
+## Storage layout
+
+Represent a dictionary column as a logical column object wrapping two persisted components:
+
+```text
+<ctable store>/
+  _cols/
+    vendor/
+      codes        # int32 NDArray, one code per row
+      dictionary   # variable-length string storage, unique values
+```
+
+Exact on-disk naming should match existing table storage conventions, but the logical layout should be
+column-local. Do not store dictionary values as a separate user-visible CTable column.
+
+### Codes storage
+
+- Fixed-width `int32` NDArray.
+- Shape grows with table rows.
+- Uses the normal column compression parameters.
+- Indexes operate on this codes array.
+
+### Dictionary value storage
+
+Use the existing variable-length scalar string machinery where possible:
+
+- `vlstring` values.
+- Append-only.
+- Stored under the dictionary column directory.
+- Maintains insertion order as category order.
+
+### In-memory lookup cache
+
+Maintain an in-memory mapping for fast encoding:
+
+```python
+_value_to_code: dict[str, int]
+```
+
+Build it lazily from persisted dictionary values when opening a table. Persist only dictionary values, not the
+Python mapping.
+
+## Schema metadata
+
+Add a new spec kind, likely in `src/blosc2/schema.py`:
+
+```json
+{
+  "kind": "dictionary",
+  "index_type": {"kind": "int", "bits": 32, "signed": true, ...},
+  "value_type": {"kind": "vlstring", ...},
+  "ordered": false,
+  "nullable": true,
+  "null_code": -1
+}
+```
+
+The compiler should produce a `CompiledColumn` with:
+
+- logical type: dictionary;
+- physical dtype for codes: `np.int32`;
+- display width based on decoded strings, not codes where feasible.
+
+Schema validation should reject unsupported v1 combinations early:
+
+- non-`int32` index type;
+- non-`vlstring` value type;
+- null dictionary values;
+- nullable policies incompatible with `-1` null code.
+
+## Core implementation tasks
+
+### 1. Add `DictionarySpec`
+
+Implement in schema/spec layer:
+
+- constructor helper `blosc2.dictionary(...)`;
+- metadata serialization/deserialization;
+- equality/repr/docs;
+- validation of v1 constraints.
+
+Potential fields:
+
+```python
+@dataclass(frozen=True)
+class DictionarySpec(ColumnSpec):
+    index_type: IntSpec
+    value_type: VLStringSpec
+    ordered: bool = False
+    nullable: bool = True
+    null_code: int = -1
+```
+
+### 2. Add dictionary column object
+
+Implement a column class, for example:
+
+```python
+class DictionaryColumn:
+    codes: blosc2.NDArray
+    dictionary: _ScalarVarLenArray  # or existing vlstring backing type
+```
+
+Required operations:
+
+- `__len__`
+- `__getitem__` scalar/slice/list/boolean mask returning decoded values
+- `__setitem__` scalar/slice/list values, encoding as needed
+- `append` / `extend` for Arrow import and row appends
+- `flush` if dictionary storage uses buffered batch machinery
+- `close` if needed
+
+For v1, prioritize the operations used by CTable append/import/read paths.
+
+### 3. Extend table storage
+
+Add storage factory methods analogous to existing list/varlen methods:
+
+```python
+storage.create_dictionary_column(name, spec, cparams=None, dparams=None)
+storage.open_dictionary_column(name, spec, ...)
+```
+
+These create/open both physical components (`codes`, `dictionary`) under the logical column.
+
+### 4. Extend CTable schema compilation and column creation
+
+Update CTable creation paths to detect `DictionarySpec`:
+
+- schema compiler;
+- `_create_columns` / equivalent new-table creation;
+- `_create_arrow_import_columns`;
+- open-from-storage path;
+- row append/update paths;
+- column widths/display.
+
+Dictionary columns should be logical `ct.col_names` entries just like ordinary columns.
+
+### 5. Decoded read/write behavior
+
+When assigning Python values:
+
+```python
+ct.append({"vendor": "Uber"})
+ct["vendor"][3] = "Lyft"
+ct["vendor"][4:6] = ["Uber", None]
+```
+
+Encoding behavior:
+
+- If value is `None`: code `-1` if nullable, otherwise raise.
+- If value is `str` and exists: use existing code.
+- If value is `str` and missing: append dictionary value, assign new code.
+- If value is not `str`/`None`: raise `TypeError`.
+
+When assigning raw codes, require explicit codes API. Do not silently accept integers via logical column writes,
+because integers could be real category values in future dictionary types.
+
+## Arrow/Parquet interoperability
+
+### Import from Arrow
+
+Map Arrow dictionary columns as follows:
+
+```text
+dictionary<values=string, indices=int8|int16|int32|int64, ordered=X>
+  -> blosc2.dictionary(index_type=blosc2.int32(), value_type=blosc2.vlstring(), ordered=X)
+```
+
+Accepted Arrow index types for v1:
+
+- signed integer indices: `int8`, `int16`, `int32`, `int64`;
+- unsigned integer indices: `uint8`, `uint16`, `uint32`, `uint64`, provided all values fit in signed
+  `int32`;
+- normalize internally to `int32`;
+- reject if category count or any index value does not fit signed `int32`.
+
+Accepted Arrow value types for v1:
+
+- `string`, `large_string`, `utf8`, `large_utf8`;
+- normalize internally to `vlstring`.
+
+Rejected for v1:
+
+- dictionary values of binary, numeric, struct, list, etc.;
+- nested dictionary arrays inside list/struct;
+- unsigned index arrays containing values that do not fit in signed `int32`.
+
+### Chunked Arrow arrays and dictionary unification
+
+Arrow chunked arrays and Parquet row groups may carry different dictionaries per chunk. CTable v1 should use
+one global dictionary per column.
+
+Import algorithm:
+
+1. For each incoming Arrow dictionary array chunk:
+   - read its dictionary values;
+   - map chunk-local category values to global codes;
+   - translate chunk indices to global int32 codes;
+   - translate Arrow nulls to `-1`.
+2. Append translated codes to the CTable codes storage.
+3. Append new category values to the global dictionary as discovered.
+
+Preserve first-seen category order. This is deterministic for a given input stream and works well for append-only
+semantics.
+
+If `ordered=True` and chunks have different dictionary orders, global first-seen order may not preserve the
+semantic order. For v1:
+
+- preserve and export `ordered=True` only when the importer can verify all chunk dictionaries have the same
+  order for existing values;
+- otherwise raise `ValueError`. Do not silently downgrade to `ordered=False`, because `ordered=True` carries
+  semantic meaning and silently changing it could make comparisons/sorts incorrect later.
+
+### Arrow schema inference
+
+Update `_arrow_type_to_spec()`:
+
+- recognize top-level Arrow dictionary type;
+- return `DictionarySpec` for supported v1 string dictionaries;
+- raise clear `TypeError` for unsupported dictionary variants.
+
+Do not decode dictionary type to plain string inside core `CTable.from_arrow()` when dictionary support is
+available. The CLI can later expose a flag to force decoding if desired.
+
+### Arrow batch writing into CTable
+
+Update `_write_arrow_batch()`:
+
+- if compiled column is dictionary:
+  - accept Arrow dictionary arrays and use the unification algorithm;
+  - also optionally accept plain string arrays by encoding strings into the dictionary;
+  - reject unsupported types.
+
+This allows appending plain strings to an existing dictionary CTable column.
+
+### Export to Arrow
+
+When `iter_arrow_batches()` sees a dictionary column, emit Arrow dictionary arrays:
+
+```text
+dictionary<values=string, indices=int32, ordered=spec.ordered>
+```
+
+Implementation approach:
+
+- Arrow dictionary values: `pa.array(dictionary_values, type=pa.string())` or `pa.large_string()`? Use `pa.string()`
+  for v1 unless a value exceeds Arrow string limits, then use `large_string()`.
+- Indices: `pa.array(codes, type=pa.int32())`, with null mask for `codes == -1`.
+- Construct `pa.DictionaryArray.from_arrays(indices, dictionary, ordered=spec.ordered)`.
+
+For slices/batches, reuse the full column dictionary rather than creating per-batch dictionaries. This preserves
+stable codes and simplifies export.
+
+### Parquet CLI behavior
+
+Once core dictionary support exists:
+
+- Default CLI import should preserve supported Arrow dictionary string columns as dictionary CTable columns.
+- Add an escape hatch:
+
+```bash
+parquet-to-blosc2 --decode-dictionaries input.parquet output.b2d
+```
+
+or equivalent if users want plain `vlstring` columns.
+
+The default should favor preserving dictionary encoding because it is compact and closer to the original Arrow
+schema.
+
+## Query and expression support
+
+### Equality
+
+For dictionary column `vendor`:
+
+```python
+ct["vendor"] == "Uber"
+```
+
+should translate to:
+
+```python
+ct["vendor"].codes == code_for("Uber")
+```
+
+If the value is absent from the dictionary, return an all-false boolean expression/selection without scanning.
+
+Null equality:
+
+```python
+ct["vendor"] == None
+```
+
+maps to:
+
+```python
+codes == -1
+```
+
+Use whatever null comparison idiom is already preferred in CTable expressions; avoid encouraging `== None` in
+user docs if there is an `is_null()` API.
+
+### Membership
+
+```python
+ct["vendor"].isin(["Uber", "Lyft"])
+```
+
+maps to code membership:
+
+```python
+codes in [0, 1]
+```
+
+Values absent from the dictionary are ignored. If all requested values are absent, return all-false.
+
+### Ordered comparisons
+
+For v1:
+
+- If `ordered=False`, `<`, `<=`, `>`, `>=` should raise `TypeError` for dictionary columns.
+- If `ordered=True`, still defer implementation unless it is trivial to map to code comparisons. Document that
+  ordered comparisons are not supported in v1 even though the flag is stored/exported.
+
+This avoids ambiguous semantics between dictionary order and lexical string order.
+
+## Indexing support
+
+Dictionary columns should be indexed by codes.
+
+### Index creation
+
+User API should remain logical:
+
+```python
+ct.create_index("vendor")
+```
+
+Internally:
+
+- detect `vendor` is dictionary;
+- create the physical index on `vendor.codes`;
+- store public index metadata under the logical column name `vendor`;
+- mark the index as dictionary-aware so query planning maps values to codes before using it.
+
+The public API should hide the code-index detail. On disk, index files may include an explicit `codes` suffix,
+for example `__index__.vendor.codes...`, to avoid ambiguity and make debugging easier.
+
+Avoid requiring users to write:
+
+```python
+ct["vendor"].codes.create_index()
+```
+
+though exposing code-level indexes for debugging is fine.
+
+### Query planning with indexes
+
+For equality:
+
+1. Look up the queried string in the dictionary.
+2. If present, query the integer index for that code.
+3. If absent, return empty result immediately.
+
+For membership:
+
+1. Map present values to codes.
+2. Query the integer index for those codes.
+3. Ignore absent values.
+
+For nulls:
+
+- code `-1` can be included in the code index.
+- `is_null()` queries use code `-1`.
+
+### Index maintenance
+
+Because dictionary values are append-only and codes are stable:
+
+- existing index entries do not need recoding when new categories are appended;
+- appending rows updates the code index just like appending rows to an integer column;
+- deleting rows follows existing CTable valid-row semantics;
+- dictionary compaction, if added later, must invalidate/rebuild indexes.
+
+## Persistence and compatibility
+
+### Opening existing tables
+
+Existing tables do not contain dictionary specs, so no migration is needed.
+
+### Versioning
+
+Add a schema metadata version bump if the CTable schema format has one. Older versions of python-blosc2 will not
+understand `kind: dictionary`; they should fail clearly when opening such tables.
+
+### Robustness checks on open
+
+When opening a persisted dictionary column:
+
+- validate codes dtype is int32;
+- validate dictionary storage exists;
+- validate dictionary values are strings and contain no null entries;
+- optionally validate codes are `-1` or within dictionary bounds. Full validation may be expensive; provide a
+  debug/validation path rather than doing it unconditionally for huge tables.
+
+## Testing plan
+
+### Unit tests for spec/schema
+
+- `blosc2.dictionary()` creates expected spec.
+- Unsupported `index_type` raises.
+- Unsupported `value_type` raises.
+- Metadata roundtrip preserves `ordered`, `nullable`, `null_code`.
+- Dataclass schema compilation supports dictionary fields.
+
+### CTable behavior tests
+
+- Create in-memory CTable with dictionary column.
+- Append strings and nulls.
+- Repeated strings reuse codes.
+- New strings append dictionary values.
+- Decoded scalar/slice reads work.
+- `.codes[:]` and `.dictionary[:]` expose expected internals.
+- `nullable=False` rejects nulls.
+- Invalid value types raise.
+- Persistent `.b2d`/`.b2z` tables reopen correctly.
+
+### Arrow import/export tests
+
+- Import `dictionary<values=string, indices=int8>`.
+- Import `dictionary<values=string, indices=int16>`.
+- Import `dictionary<values=string, indices=int32>`.
+- Import `dictionary<values=string, indices=int64>` when values fit int32.
+- Import unsigned dictionary indices when values fit signed int32.
+- Reject too-large signed/unsigned dictionary indices or category counts.
+- Import chunked arrays with different dictionaries and verify global unification.
+- Preserve nulls as `-1` internally and Arrow nulls on export.
+- Export emits Arrow dictionary type with int32 indices and string values.
+- Parquet roundtrip preserves logical values.
+
+### Query/index tests
+
+- Equality filter on present value returns matching rows.
+- Equality filter on absent value returns no rows without scanning if possible.
+- Membership filter works.
+- Null filter works.
+- `ct.create_index("dict_col")` builds code index.
+- Equality/membership use the code index.
+- Appending rows after index creation maintains index correctness.
+
+### CLI tests
+
+- `parquet-to-blosc2` imports dictionary string column as dictionary column.
+- Export produces Parquet/Arrow dictionary column.
+- Optional dictionary-decoding flag imports as `vlstring` instead.
+- Unsupported dictionary value type reports a clear error or decodes only if explicitly requested.
+- Real-world acceptance test: `~/Downloads/chicago-taxi.parquet` imports to Blosc2, exports back to Parquet,
+  and round-trip comparison succeeds for imported/exported columns.
+
+## Suggested implementation order
+
+1. Add `DictionarySpec` and public `blosc2.dictionary()` helper.
+2. Implement dictionary column storage wrapper with codes + vlstring dictionary.
+3. Integrate dictionary columns into CTable creation/open/read/write paths.
+4. Add decoded reads and append/set encoding.
+5. Add Arrow dictionary import with global dictionary unification.
+6. Add Arrow export as `pa.DictionaryArray`.
+7. Add equality/membership expression translation.
+8. Add dictionary-aware index creation and query usage.
+9. Add CLI preservation by default and optional decode flag.
+10. Add docs/examples.
+
+## Resolved design decisions
+
+These decisions are part of the v1 plan:
+
+1. Expose `nullable` on the dictionary spec, defaulting to `True`.
+2. Accept Arrow unsigned dictionary indices if all values fit in signed `int32`; normalize internally to
+   `int32`.
+3. Raise for ordered Arrow dictionaries with incompatible/differing chunk dictionary order. Do not silently
+   downgrade to unordered.
+4. Make the Parquet CLI preserve supported dictionary columns by default. Provide an opt-out flag such as
+   `--decode-dictionaries` for users who want plain `vlstring` columns.
+5. Use `.dictionary` as the preferred public property for unique values. Consider `.categories` only as a
+   future alias.
+6. Return Python lists for logical slice reads, following existing `vlstring` behavior.
+7. Keep `ct.create_index("vendor")` logical and hide code-index details from the public API. On-disk index
+   artifacts may include a `codes` suffix for clarity.
diff --git a/plans/ctable-nested-fields.md b/plans/ctable-nested-fields.md
new file mode 100644
index 00000000..5a2778f6
--- /dev/null
+++ b/plans/ctable-nested-fields.md
@@ -0,0 +1,251 @@
+# CTable nested fields via physical leaf columns
+
+## Summary
+
+Add first-class support for nested schemas in `CTable` by **physically flattening leaf fields** into real persisted columns, while preserving logical nested structure for row I/O and Arrow/Parquet roundtrips.
+
+Key idea:
+
+- Logical path: `trip.begin.lon`
+- Physical storage path in container: `/_cols/trip/begin/lon`
+- Canonical root field name: `""` (empty string)
+- Display alias for root (optional): `/`
+
+This keeps analytics/indexing fast (leaf = ordinary column), and matches `.b2d` / `.b2z` container layout naturally.
+
+**Status: core implementation complete.**  All acceptance criteria are met.
+Remaining work is captured in the [Future work](#future-work) section below.
+
+---
+
+## Goals
+
+1. Support nested struct/list schemas without storing struct leaves as opaque varlen/object blobs.
+2. Enable columnar analytics on scalar leaves using existing `CTable` machinery:
+   - filters (`where`)
+   - lazy expressions
+   - aggregates (`sum/min/max/mean/std`)
+   - indexes
+   - sorting/grouping paths already supported for scalar columns
+3. Preserve nested logical row interface (dict/list reconstruction on read).
+4. Keep backward compatibility for existing flat tables and existing nested-as-varlen tables.
+
+## Non-goals (phase 1)
+
+1. Full list-element relational semantics (`explode`, SQL-like unnests) for query planner.
+2. Indexing directly on list-valued paths.
+3. Breaking on-disk compatibility of existing tables.
+
+---
+
+## Proposed model
+
+## 1) Path model
+
+Define a canonical logical field-path type:
+
+- Root: `""`
+- Path segments: `("trip", "begin", "lon")`
+- Dotted display key: `trip.begin.lon`
+
+Add helpers:
+
+- `split_field_path(str) -> tuple[str, ...]`  ✅ implemented (`ctable_storage.py`; backslash-escape aware)
+- `join_field_path(tuple[str, ...]) -> str`  ✅ implemented (`ctable_storage.py`; escapes literal `.`, `/`, and `\\`)
+- escaping/unescaping for literal `.` and `/` in field names  ✅ implemented for logical names via backslash escaping and for physical storage via percent-encoded path segments
+
+Recommendation:
+
+- Canonical internal identity: tuple segments
+- Dotted names only as user syntax
+- Physical storage path built from escaped segments  ✅ literal `.`, `/`, `%`, and `\\` inside segments are percent-encoded
+
+## 2) Physical layout  ✅ implemented
+
+Persist scalar leaves as standard column arrays under `_cols` hierarchy:
+
+- `/_cols/trip/begin/lon`
+- `/_cols/trip/begin/lat`
+- `/_cols/trip/begin/time`
+- `/_cols/payment/fare`
+
+Intermediate nodes are namespaces only (no data arrays).
+
+For lists:
+
+- Keep existing `ListArray` physical representation for list leaves.  ✅
+- For `list<struct<...>>`, phase 1 keeps list cell storage as list payload (no explode).  ✅
+
+## 3) Schema metadata  ✅ implemented
+
+Extend schema serialization with nested mapping metadata, e.g.:
+
+- logical path -> physical column token/path  ✅ (`schema.metadata["nested"]` dict)
+- physical column -> storage path  ✅ (`schema.metadata["nested"]["physical_to_storage"]`)
+- root logical alias metadata when needed  ✅
+- row reconstruction flag when nested Arrow structs were flattened  ✅
+
+Leaf spec details such as kind, dtype, nullability, and scalar/list/dictionary behavior remain in the standard schema column specs rather than being duplicated in `metadata["nested"]`.  ✅
+
+Keep `CompiledSchema.columns` as the ordered list of **physically stored leaf columns**. `CompiledSchema.columns_by_name` may additionally contain virtual logical aliases, such as top-level `StructSpec` entries used for Arrow/Parquet schema roundtrips; these aliases are not stored columns and do not appear in `CTable.col_names`.  ✅
+
+---
+
+## API behavior
+
+## Column access  ✅ implemented
+
+Allow both:
+
+- `t["trip.begin.lon"]`  ✅
+- `t.trip.begin.lon` (via lightweight namespace proxy objects)  ✅ (`_NestedColumnNamespace`; `_StructPathColumn` is used for struct-prefix virtual access such as `t["trip"]`)
+
+`Column` operations on scalar leaves behave exactly like current top-level scalar columns.  ✅
+
+## Row materialization  ✅ implemented
+
+- `t[i]` reconstructs nested dict/list shape from leaves and list payload columns.  ✅
+- Top-level unnamed field (`""`) is handled as root container.  ✅
+
+## Select/projection  ✅ implemented
+
+`select([...])` accepts:
+
+- leaf paths (`"trip.begin.lon"`)  ✅
+- struct prefix (`"trip.begin"`) that expands to descendant leaves  ✅
+
+## Expressions  ✅ implemented
+
+`where("trip.begin.lon > -87.7 and payment.fare > 10")` supported by path rewriting to operand IDs or canonical flat leaf names.  ✅
+
+---
+
+## Implementation plan
+
+## Phase 0 — design/compat scaffolding
+
+1. ✅ Path splitting/joining helpers (`_column_name_to_relpath` + inverse in schema metadata).
+2. ✅ New schema metadata version (`schema.metadata["nested"]` with `version` key; backward-compatible read of old flat schemas).
+3. ⚠️ Feature flag (internal) to enable nested physical layout for new tables — not a separate flag; nested layout is activated implicitly when the input schema contains struct fields.
+
+## Phase 1 — schema compilation flattening
+
+1. ✅ Schema compiler flattens nested structs into physical leaf columns (`schema_compiler.py`, `_flatten_arrow_struct_schema`).
+2. ✅ Nested path mapping kept for reconstruction/export (`logical_to_physical`, `physical_to_storage`, optional root alias, and `reconstruct_rows` in nested metadata). Leaf type details remain in normal schema column specs.
+3. ✅ Deterministic flat column keys — canonical dotted form used throughout.
+4. ✅ Nullable propagation rules explicit (propagated from parent struct nullability).
+
+## Phase 2 — storage backend
+
+1. ✅ `ctable_storage` create/open accept hierarchical column paths.
+2. ✅ Arrays stored in `/_cols/<seg>/<seg>/...` hierarchy.
+3. ✅ Reopen logic uses stored schema column names and maps dotted names back to hierarchical `_cols/...` paths.
+4. ~~Migration-safe fallback for legacy flat `_cols/<name>` tables~~ — **skipped**: no code ever shipped writing dotted names as flat paths, so no migration is needed.
+
+## Phase 3 — read/write data paths
+
+1. ✅ `append`/`extend` flatten input nested dicts into leaf columns (`_flatten_nested_dict`, updated `_normalize_row_input` and `extend`).
+2. ✅ `__getitem__(int)` and row iterators reconstruct nested rows (`_materialize_row`, `reconstruct_rows` flag).
+3. ✅ Fast-path for already-flat rows preserved.
+
+## Phase 4 — column resolution and expression engine
+
+1. ✅ Column resolver from dotted path string → physical leaf column.
+2. ✅ Attribute path proxy `t.trip.begin.lon` via `_StructPathColumn`.
+3. ✅ Expression parsing includes nested leaves (`_where_expression_operands`).
+4. ✅ List/object leaf expressions restricted appropriately in phase 1.
+
+## Phase 5 — indexes and analytics
+
+1. ✅ `create_index(col_name="trip.begin.lon")` works on scalar leaves.
+2. ✅ Index catalog uses canonical dotted target path.
+3. ✅ Aggregates (`mean`, `sum`, `min`, `max`, `std`) and `sort_by` work on resolved leaf NDArrays.
+
+## Phase 6 — Arrow/Parquet import/export
+
+1. ✅ Import: nested Arrow schema flattened into leaf storage + nested metadata (`from_arrow`, `_flatten_arrow_struct_*`).
+2. ✅ Export: Arrow nested schema rebuilt from leaves (`to_arrow`, `to_parquet` reconstruct struct hierarchy).
+3. ✅ Dictionary/timestamp/null semantics unchanged.
+
+## Phase 7 — docs/tests/perf
+
+1. Tests:
+   - ✅ Append/reopen/roundtrip for nested rows (`tests/ctable/test_nested_append.py`, `test_nested_access_storage.py`).
+   - ✅ `where`/`select`/`index`/`aggregate` on nested scalar leaves (covered in existing ctable test suite).
+   - ✅ Compatibility: legacy flat tables still pass all tests.
+   - ✅ Path parsing and escaping tests for literal `.` and `/` in nested Arrow field names (`tests/ctable/test_nested_access_storage.py`).
+2. Docs:
+   - ✅ Nested path syntax, column access, filtering, Arrow/Parquet roundtrip (`doc/reference/ctable.rst`, "Nested fields" section).
+   - ✅ Method-level docstrings updated: `append`, `extend`, `__getitem__`, `where`, `select`, `rename_column`, `create_index`, `sort_by`, `from_arrow`, `from_parquet`.
+3. Benchmarks:
+   - ✅ Nested leaf filter/index performance vs flat columns (`bench/ctable/bench_nested_filter_index.py`); overhead is negligible.
+
+---
+
+## Compatibility and migration
+
+1. ✅ Existing tables remain readable/writable as-is.
+2. ✅ Nested layout activated automatically when schema contains struct fields.
+3. Optional utility later: migrate legacy nested-varlen columns to flattened-leaf layout (see Future work).
+
+---
+
+## Acceptance criteria  ✅ all met
+
+1. ✅ Can ingest taxi-like schema and persist leaves under hierarchical `_cols/...` paths.
+2. ✅ `t["trip.begin.lon"].mean()` works and matches Arrow/Awkward reference.
+3. ✅ `t.where("payment.fare > 20").nrows` works.
+4. ✅ `t.create_index(col_name="trip.begin.time")` works for scalar leaf.
+5. ✅ `t[i]` returns nested row shape equivalent to input schema.
+6. ✅ Existing non-nested/legacy tables keep current behavior unchanged.
+
+---
+
+## Future work
+
+### FW-1 — Field-name escaping for literal `.` and `/`
+
+**Status**: implemented.
+
+Logical nested paths use unescaped `.` as the separator. Literal `.`, `/`, and `\\`
+inside a field-name segment are represented with backslash escaping in the logical
+column name, e.g. Arrow path segments `("trip.info", "begin/point", "lon.deg")`
+become `trip\\.info.begin\\/point.lon\\.deg`.
+
+Physical storage percent-encodes structural characters inside each path segment before
+joining segments under `_cols`, e.g. the same leaf is stored at
+`_cols/trip%2Einfo/begin%2Fpoint/lon%2Edeg`.
+
+### FW-2 — List-struct analytics (explode / unnest)
+
+**Status**: deferred (non-goal for phase 1).
+
+`list<struct<...>>` fields are currently stored as opaque list-payload columns.  Future
+work would:
+
+- Define an `explode` operation that creates a row-per-element view.
+- Enable `where` / `create_index` on paths inside list elements.
+- Design SQL-style unnest semantics.
+
+### FW-3 — Migration utility for legacy nested-varlen tables
+
+**Status**: deferred; likely unnecessary unless user demand appears.
+
+Because `CTable` is newly released, few if any production tables are expected to exist
+with top-level Arrow `struct<...>` columns imported as opaque `blosc2.struct` varlen
+columns. Existing tables remain readable as-is, but they will not automatically gain
+nested-leaf analytics.
+
+Recommended path: re-import the original Arrow/Parquet source with a python-blosc2
+version that supports nested-leaf flattening. This creates the new physical leaf layout
+and nested metadata directly.
+
+A future `CTable.migrate_nested_columns()` utility could still be considered if users
+have important legacy tables without access to the original source data. Such a utility
+would need to:
+
+- Detect columns whose schema spec is `struct` with a known logical type.
+- Re-import/materialize them as flattened leaf columns.
+- Update schema metadata and physical layout atomically.
+- Leave `list<struct<...>>` migration out of scope until list-struct analytics are
+  designed separately.
diff --git a/plans/ctable-separate-nested-cols.md b/plans/ctable-separate-nested-cols.md
new file mode 100644
index 00000000..edbcfd33
--- /dev/null
+++ b/plans/ctable-separate-nested-cols.md
@@ -0,0 +1,686 @@
+# CTable separate nested columns for list-struct data
+
+## Summary
+
+Extend CTable nested storage so Arrow/Parquet datasets that are physically stored
+as an unnamed top-level `list<struct<...>>` can be imported as a normal CTable
+whose rows are the **elements of that root list** and whose struct leaves are
+ordinary nested CTable columns.
+
+This is especially important for Awkward-style Parquet files such as Chicago
+taxi, whose top-level schema is effectively:
+
+```text
+"": list<struct<
+  trip: struct<...>,
+  payment: struct<...>,
+  company: ...
+>>
+```
+
+For this case, the outer unnamed list is treated as a physical/chunking artifact
+of the Parquet encoding, not as a semantic CTable column.  The imported table
+should look and behave like:
+
+```python
+ct["trip.begin.lon"]
+ct["payment.fare"]
+ct.where("payment.fare > 20")
+ct.nrows == total_number_of_root_list_elements
+```
+
+No user-facing `column_0` and no required `ct.explode()` for this root-list case.
+
+The mental model is:
+
+```text
+unnamed list<struct<...>>  ->  root record stream  ->  regular nested CTable rows
+```
+
+Named `list<struct<...>>` fields inside an otherwise normal parent table remain
+typed `ListArray` columns by default.  Future `explode()` support can expose
+those named repeated fields as element-row views when parent/child analytics are
+needed.
+
+---
+
+## Relationship to existing nested-field work
+
+`plans/ctable-nested-fields.md` already covers:
+
+- logical dotted paths;
+- escaping literal `.`, `/`, and `\\`;
+- physical hierarchical `_cols/...` storage paths;
+- top-level `struct<...>` flattening into leaf columns;
+- nested row reconstruction for scalar struct leaves;
+- Arrow/Parquet schema roundtrips for top-level structs.
+
+This plan extends that machinery to the special and common case where the whole
+Parquet file is an unnamed top-level `list<struct<...>>` record stream.
+
+---
+
+## Goals
+
+1. Import a single unnamed top-level `list<struct<...>>` as a regular CTable row
+   stream, with the list elements becoming CTable rows.
+2. Physically store scalar leaves of the element struct as separate CTable
+   columns, typically NDArrays or existing typed CTable column kinds.
+3. Preserve nested logical field paths, e.g. `trip.begin.lon`, `payment.fare`.
+4. Avoid `column_0` in the user-facing API for unnamed root-list datasets.
+5. Keep named `list<struct<...>>` fields as typed `ListArray` columns by default.
+6. Store enough provenance metadata to explain that an unnamed root list was
+   flattened, without requiring exact original Parquet row grouping roundtrip.
+7. Make separated nested-column import the default for Parquet inputs that qualify,
+   with explicit opt-out for schema-fidelity workflows.
+
+## Non-goals for first implementation
+
+1. Exact reconstruction of the original Parquet row grouping for unnamed root
+   lists.
+2. In-place migration of existing opaque `ListArray` list-struct columns.
+3. Full `explode()` / SQL-style unnesting for named repeated fields.
+4. Recursive flattening of nested lists inside element structs.
+5. Making Awkward Array a dependency.
+
+---
+
+## Core distinction: root record stream vs named repeated field
+
+### Case 1: single unnamed top-level `list<struct<...>>`
+
+Input schema:
+
+```text
+"": list<struct<trip: struct<...>, payment: struct<...>, company: ...>>
+```
+
+Interpretation:
+
+- The unnamed top-level list is a physical container/chunking artifact.
+- Its elements are the logical records.
+- The element struct is the logical root schema.
+- The imported CTable row count is the total number of list elements, not the
+  number of original Parquet rows.
+
+User-facing result:
+
+```text
+trip.sec
+trip.begin.lon
+trip.begin.lat
+trip.begin.time
+trip.end.lon
+trip.end.lat
+trip.end.time
+trip.path          # nested list inside element; kept as a ListArray initially
+payment.fare
+payment.tips
+payment.total
+payment.type
+company
+```
+
+Example:
+
+```python
+ct = blosc2.CTable.from_parquet("chicago-taxi.parquet", separate_nested_cols=True)
+ct["trip.begin.lon"].mean()
+ct.where("payment.fare > 20")
+```
+
+No `ct.explode()` is needed because `ct` is already in the element row space.
+
+### Case 2: named `list<struct<...>>` inside a parent table
+
+Input schema:
+
+```text
+user_id: int64
+events: list<struct<time: timestamp, amount: float64>>
+```
+
+Interpretation:
+
+- Parent rows are semantically meaningful.
+- `user_id` has one value per parent row.
+- `events` has one list per parent row.
+
+Default representation:
+
+```text
+user_id: NDArray
+events: ListArray(list(struct(...)))
+```
+
+This requires no separate parent-offset metadata for ordinary CTable use:
+
+```python
+ct["user_id"]
+ct["events"]
+```
+
+Offsets only become important if/when a future `ct.explode("events")` view is
+implemented and needs to map event elements back to parent rows.
+
+---
+
+## Proposed metadata
+
+For unnamed-root flattening, store provenance metadata.  This is proposed shape,
+not final schema:
+
+```json
+{
+  "nested": {
+    "version": 2,
+    "original_root": {
+      "kind": "unnamed_list_struct",
+      "field_name": "",
+      "preserve_grouping": false
+    }
+  }
+}
+```
+
+Meaning:
+
+- `kind = "unnamed_list_struct"`: source had an unnamed top-level list of struct.
+- `field_name = ""`: canonical Arrow root field name.
+- `preserve_grouping = false`: original Parquet row/list grouping is not part of
+  the logical CTable model and is not guaranteed to roundtrip exactly.
+
+Future optional metadata if exact grouping is requested:
+
+```json
+{
+  "original_root": {
+    "kind": "unnamed_list_struct",
+    "field_name": "",
+    "preserve_grouping": true,
+    "offsets": "_root._offsets",
+    "valid": "_root._valid"
+  }
+}
+```
+
+But first implementation should not store original offsets by default.
+
+---
+
+## Physical storage model for unnamed root list
+
+Given:
+
+```text
+"": list<struct<
+  trip: struct<
+    sec: float,
+    begin: struct<lon: double, lat: double, time: timestamp[ms]>,
+    path: list<struct<londiff: float, latdiff: float>>
+  >,
+  payment: struct<fare: float, tips: float, total: float>,
+  company: dictionary<string>
+>>
+```
+
+Store scalar struct leaves as ordinary CTable physical columns:
+
+```text
+/_cols/trip/sec
+/_cols/trip/begin/lon
+/_cols/trip/begin/lat
+/_cols/trip/begin/time
+/_cols/payment/fare
+/_cols/payment/tips
+/_cols/payment/total
+/_cols/company
+```
+
+Nested list fields inside the element struct remain typed list columns in phase
+1:
+
+```text
+/_cols/trip/path
+```
+
+where `trip.path` is a `ListArray` with one cell per logical trip row.
+
+All visible columns in the imported CTable have the same row count:
+
+```text
+nrows == total number of elements in the unnamed root list
+```
+
+Leaf types may be:
+
+- fixed-width numeric/bool/timestamp NDArrays;
+- dictionary columns;
+- variable-length scalar columns (`vlstring`, `vlbytes`);
+- typed `ListArray` columns for nested list fields;
+- `ObjectArray` only as fallback for unsupported/heterogeneous data.
+
+---
+
+## Named list-struct fields: ListArray vs ObjectArray
+
+For named `list<struct<...>>` fields, prefer typed `ListArray` by default:
+
+```text
+events: ListArray(spec=list(struct({"time": timestamp(...), "amount": float64()})))
+```
+
+Reasons:
+
+- Preserves Arrow logical type better than schema-less objects.
+- Keeps field/type metadata available for future `explode()`.
+- Roundtrips to Arrow/Parquet more naturally.
+- Supports both `serializer="msgpack"` and `serializer="arrow"` tradeoffs.
+
+Use `ObjectArray` only as fallback when:
+
+- the Arrow type is unsupported by typed `ListArray`;
+- the list contents are heterogeneous;
+- item schema cannot be represented by `ListSpec`;
+- the user explicitly requests object fallback.
+
+---
+
+## Import behavior
+
+### Phase A: default import with opt-out
+
+The feature started as opt-in, but is now enabled by default for
+`CTable.from_parquet()` and `parquet-to-blosc2` when the Parquet schema qualifies
+as a single unnamed root `list<struct<...>>`.  The same `separate_nested_cols`
+default also lets ordinary top-level Arrow/Parquet `struct<...>` fields follow
+`CTable.from_arrow()` semantics and flatten recursively into dotted leaf columns
+without changing row cardinality:
+
+```text
+CTable.from_parquet(...)
+parquet-to-blosc2 input.parquet output.b2d
+```
+
+Opt out when closer fidelity to the original Parquet row/schema shape is desired:
+
+```text
+CTable.from_parquet(..., separate_nested_cols=False)
+parquet-to-blosc2 ... --no-separate-nested-cols
+```
+
+`CTable.from_arrow(..., separate_nested_cols=True)` remains available for direct
+Arrow inputs.  Named list fields, including named `list<struct<...>>`, remain
+typed `ListArray` columns by default.
+
+### Phase B: eligibility for root flattening
+
+Root flattening applies when:
+
+1. the Arrow schema has exactly one top-level field;
+2. the top-level field name is `""` or is otherwise known to be the canonical
+   unnamed root;
+3. the top-level field type is `list<struct<...>>` or `large_list<struct<...>>`.
+
+When all conditions hold, flatten `array.values` (the struct element array) into
+CTable columns and use `len(array.values)` as the CTable row count.
+
+### Phase C: import algorithm for unnamed root
+
+1. Read Arrow list array/chunked array.
+2. For each batch/chunk, access the flattened element struct array via
+   `list_array.values`.
+3. Recursively flatten struct fields into leaf arrays.
+4. Create/append CTable columns for each leaf.
+5. For nested list fields inside the element struct, create/append typed
+   `ListArray` columns with one list cell per element row.
+6. Avoid `to_pylist()` for scalar leaves whenever possible.
+7. Store `original_root` provenance metadata.
+
+The original top-level list offsets do not need to be stored by default.
+
+---
+
+## Row access and logical API
+
+For unnamed-root flattening, `ct[i]` returns a row representing one element of
+the original root list:
+
+```python
+row = ct[i]
+row.trip["begin"]["lon"]
+row.payment["fare"]
+```
+
+Column access is ordinary nested CTable access:
+
+```python
+ct["trip.begin.lon"]
+ct.trip.begin.lon
+ct["payment.fare"]
+```
+
+Filtering and analytics operate directly:
+
+```python
+ct.where("payment.fare > 20")
+ct["trip.begin.lon"].mean()
+ct.select(["trip.begin", "payment.fare"])
+```
+
+No `column_0` and no required `explode()` for this case.
+
+---
+
+## Arrow/Parquet export behavior
+
+Exact reproduction of the original unnamed `list<struct<...>>` Parquet row
+layout is not a goal by default. Blosc2 and Parquet have different storage
+models; import/export should preserve the logical data decently rather than
+promise byte- or schema-shape-exact Parquet roundtrips.
+
+Default export may write the clean logical table:
+
+```text
+trip: struct<...>
+payment: struct<...>
+company: ...
+```
+
+rather than wrapping rows back into an unnamed top-level `list<struct<...>>`.
+
+A future compatibility option could preserve and re-emit the original root-list
+row grouping, but only if a concrete user need appears. If added, original
+offsets/validity would need to be stored at import time.
+
+---
+
+## Future `explode()` semantics for named repeated fields
+
+`explode()` remains useful for named list fields inside parent tables, but is not
+required for unnamed-root record streams.
+
+Example future API:
+
+```python
+events = ct.explode("events")
+events["time"]
+events["amount"]
+events["_parent"]  # optional parent row index
+events["_ordinal"]  # optional position inside parent list
+```
+
+This is a logical view over a repeated field and changes row granularity from
+parent rows to element rows.  It may require offsets or a generated parent-index
+array.  This is deferred until after root record stream flattening is working.
+
+---
+
+## Storage and CTable integration
+
+### TreeStore / nested CTable compatibility
+
+A CTable with separated nested columns must remain self-contained when stored as
+an object/subtree inside a `TreeStore`, including compact `.b2z` stores. All
+physical leaves, indexes, and metadata must live under the CTable root and be
+addressed relative to that root:
+
+```text
+/some_table/_meta
+/some_table/_valid_rows
+/some_table/_cols/trip/sec
+/some_table/_cols/trip/begin/lon
+/some_table/_cols/payment/fare
+```
+
+Opening `/some_table` as a regular CTable should reconstruct the same logical
+schema and expose the same APIs (`ct[i]`, `ct.where(...)`, `to_arrow()`) without
+requiring state outside the CTable subtree. Reopen logic should continue to rely
+on the CTable schema/manifest rather than scanning arbitrary outer TreeStore
+children.
+
+For `.b2z`, direct-offset/open behavior must work for all separated nested
+leaves, just like current hierarchical `_cols/...` CTable leaves.
+
+### Schema representation
+
+Recommended for unnamed-root flattening:
+
+- `CompiledSchema.columns` contains the physical, user-visible element-row leaf
+  columns.
+- `CTable.col_names` contains logical nested paths such as `trip.begin.lon` and
+  `payment.fare`.
+- `metadata["nested"]["original_root"]` records that these columns came from an
+  unnamed top-level list of struct.
+- There are no user-visible `_offsets` / `_valid` columns by default.
+
+---
+
+## Indexing
+
+For unnamed-root flattened tables, indexes work like normal CTable indexes:
+
+```python
+ct.create_index("payment.fare")
+ct.where("payment.fare > 20")
+ct.create_index("trip.begin.time")
+```
+
+For named repeated fields, element-level indexes should be deferred until
+`explode()` semantics are implemented.
+
+---
+
+## Implementation phases
+
+### Phase 0 — design scaffolding
+
+- [x] Define `original_root` provenance metadata.
+- [x] Add helpers to detect a single unnamed top-level `list<struct<...>>` schema.
+- [x] Add helpers to flatten Arrow `ListArray.values` struct arrays into leaf arrays.
+
+### Phase 1 — unnamed-root record stream import
+
+- [x] Implement `separate_nested_cols=True` support for single unnamed top-level
+  `list<struct<...>>`; make it the default for `CTable.from_parquet()` and the CLI.
+- [x] Import element struct leaves as normal nested CTable columns.
+- [x] Keep nested list fields inside the element struct as typed `ListArray` columns.
+- [x] Avoid `to_pylist()` for scalar leaves; fixed-width leaves use the Arrow → NumPy path.
+- [x] Set `ct.nrows` to the total element count.
+- [x] Store `original_root` provenance metadata.
+- [x] Add `CTable.from_parquet(max_rows=...)`; for unnamed-root imports the limit
+  applies to flattened element rows.
+
+Acceptance tests:
+
+- [x] Simple unnamed `list<struct<scalar leaves>>` imports to dotted CTable columns.
+- [x] Chicago taxi-style sample imports without `column_0` via `CTable.from_parquet()`
+  and `parquet-to-blosc2`.
+- [x] `CTable.from_parquet(..., max_rows=N)` limits ordinary rows and flattened
+  unnamed-root element rows.
+- [x] `ct.where("payment.fare > 20")` works directly.
+- [x] `ct["trip.begin.lon"].mean()` works directly.
+- [x] Reopen persistent `.b2d` / `.b2z`.
+- [x] `to_arrow()` emits a clean logical nested table.
+- [x] CLI `--no-separate-nested-cols` preserves ordinary top-level structs as
+  singleton-list columns for closer schema fidelity.
+- [x] CLI default `--separate-nested-cols` flattens ordinary top-level structs into
+  dotted columns consistently with `CTable.from_arrow()`.
+
+### Phase 2 — nested list children inside root elements
+
+- [x] Ensure fields like `trip.path: list<struct<...>>` become typed `ListArray`
+  columns with one cell per element row.
+- [x] Support `serializer="msgpack"` and `serializer="arrow"` for these list
+  columns.
+- [x] Add fast Arrow import path for Arrow-serialized list columns via
+  `ListArray.extend_arrow()`, avoiding Python object materialization.
+- [x] Make Arrow the default list serializer for Parquet imports in both
+  `CTable.from_parquet()` and `parquet-to-blosc2`; msgpack remains available for
+  read-time PyArrow independence.
+- [x] Add serializer-aware batching defaults for the CLI: Arrow uses the sampled
+  flattened Parquet-batch scale, while msgpack uses
+  `compute_chunks_blocks(estimated_nrows).blocks[0]` to avoid giant Python object
+  payloads.
+- [x] Expose `items_per_block` in `BatchArray.info` and `ListArray.info` so the
+  internal block-size heuristic is visible when tuning compression/random access.
+- [x] Retune `BatchArray._guess_blocksize()` cache-budget tiers so default
+  `clevel=5` uses `L2 / 2` instead of L1-sized blocks, improving compression for
+  Arrow IPC payloads while keeping blocks smaller than full-batch `clevel=6+`
+  behavior.
+- [ ] Add regression tests for `items_per_block` appearing in `.info` output.
+- [ ] Add compression/lookup microbenchmarks for Arrow `ListArray` block-size
+  tuning on Chicago taxi-style list-struct payloads.
+
+### Phase 3 — named repeated field explode (future)
+
+- [ ] Add `ct.explode("events")` for named list fields if needed.
+- [ ] Expose element leaf columns and optional `_parent`, `_ordinal`.
+- [ ] Support `where`, aggregates, and sorting on exploded scalar leaves.
+
+### Phase 4 — parent predicates (future)
+
+- [ ] Add `where_any()` and `where_all()` for named repeated fields if there is user
+  demand.
+- [ ] Map element masks back to parent masks using offsets/parent-index arrays.
+
+### Phase 5 — recursive repeated groups (future)
+
+- [ ] Consider recursively flattening nested repeated fields inside element structs.
+- [ ] Example: `trip.path.londiff` in Chicago taxi.
+- [ ] This requires nested row-space semantics and should be designed separately.
+
+---
+
+## Profiling and tuning notes
+
+Recent profiling on:
+
+```bash
+parquet-to-blosc2 chicago-taxi.parquet chicago-taxi.b2d \
+  --overwrite --separate-nested-cols --max-rows 200_000
+```
+
+showed that the old msgpack list serializer spends most of its time in the
+list-column conversion path:
+
+- `CTable._write_arrow_batch()` dominated the import path.
+- Inside that function, `arrow_col.to_pylist()` for the nested list column took
+  about 88% of the function time for the profiled Chicago taxi import.
+- Fixed-width scalar leaves were already using the Arrow → NumPy path via
+  `_arrow_column_to_numpy()`, so the main Python-object materialization issue was
+  the nested `ListArray` column, not all columns.
+
+Using Arrow serialization for nested list columns avoids this conversion.  This
+is now the default for Parquet imports; pass `--list-serializer msgpack` only when
+read-time PyArrow independence is more important than import speed:
+
+```bash
+parquet-to-blosc2 chicago-taxi.parquet chicago-taxi.b2d \
+  --overwrite --separate-nested-cols --max-rows 200_000
+```
+
+Observed result on the 200k-row sample:
+
+- msgpack list serializer: about 6.1 s import time, 12.5 MB output.
+- arrow list serializer: about 0.6 s import time, 14.7 MB output.
+
+Arrow-serialized `ListArray`/`BatchArray` payloads are still compressed by Blosc2
+as serialized byte payloads, so `BatchArray` keeps `typesize=1` by default.
+Experiments with this Chicago taxi `trip.path` payload showed `typesize=1` was
+also the best choice empirically.
+
+The more important tuning parameter was internal `items_per_block`.  The old
+`clevel=5` heuristic used an L1-sized budget and produced small blocks (for this
+case, around 804 items/block), which compressed poorly.  Retuning the heuristic
+to use `L2 / 2` for `clevel` 4–6 produced much larger but still sub-batch blocks
+(for this case, around 51k items/block), improving the `trip.path` cratio from
+about 4.95 to about 12.0 with only a small copy-time increase.
+
+Current `BatchArray._guess_blocksize()` policy:
+
+- `clevel` 1–3: L1 data-cache budget.
+- `clevel` 4–6: half the L2 cache budget.
+- `clevel` 7–8: full L2 cache budget.
+- `clevel` 9: full batch.
+
+Open follow-ups:
+
+- Add tests around the new `.info` fields and block-size heuristic.
+- Benchmark random lookup latency versus compression ratio for different
+  `items_per_block` values on Arrow list-struct payloads.
+- Keep the read-time PyArrow requirement for Arrow-serialized list columns documented
+  in the `CTable.from_parquet()` docstring and CLI `--list-serializer` help.
+
+---
+
+## Resolved design decisions
+
+1. Use the name `separate_nested_cols` for this behavior/API surface. It better
+   describes the general physical goal: nested fields become separate physical
+   CTable columns where possible.
+2. For qualifying schemas, unnamed-root list flattening is automatic by default:
+   - exactly one top-level field;
+   - field name is the canonical unnamed root `""`;
+   - field type is `list<struct<...>>` or `large_list<struct<...>>`.
+
+   Rationale: for these files, the outer list is a physical Parquet encoding
+   artifact rather than a meaningful user column. Separating the element struct
+   leaves produces a more natural CTable, improves analytics, and should usually
+   improve compression for scalar leaves because each leaf is compressed with its
+   own dtype/codec path. Users can opt out with `separate_nested_cols=False` or
+   `--no-separate-nested-cols` when closer fidelity to the original Parquet schema
+   is desired.
+3. Store provenance metadata by default, but do not store original root offsets
+   by default. Exact original Parquet root grouping is considered a low-priority
+   compatibility feature, not part of the normal CTable/Parquet interchange contract.
+4. `to_parquet()` should emit a clean logical nested table by default, e.g.
+   `trip: struct<...>`, `payment: struct<...>`, `company: ...`, not a re-wrapped
+   unnamed `list<struct>` with arbitrary grouping.
+5. Do not silently fall back to `ObjectArray` for unsupported nested children.
+   Raise by default; use `object_fallback=True` for explicit ObjectArray fallback.
+
+---
+
+## Current status and remaining work
+
+The first milestone is implemented: unnamed-root record stream flattening for one
+top-level `list<struct<...>>` column supports:
+
+```python
+ct = blosc2.CTable.from_parquet(
+    "chicago-taxi.parquet",
+    separate_nested_cols=True,
+)
+
+ct["payment.fare"].mean()
+ct.where("payment.fare > 20")
+ct["trip.begin.lon"].mean()
+```
+
+This is now the default for `CTable.from_parquet()` and `parquet-to-blosc2` for
+qualifying unnamed-root `list<struct<...>>` Parquet files. Pass
+`separate_nested_cols=False` in the library API, or `--no-separate-nested-cols`
+in the CLI, when preserving the original Parquet row/schema shape is more
+important than the separated column layout.
+
+Implemented beyond the original first milestone:
+
+- ordinary top-level structs flatten into dotted columns by default in the CLI;
+- `parquet-to-blosc2 --progress` is opt-in and reports ETA for unnamed-root
+  imports;
+- unnamed-root CLI imports write one flattened Parquet batch at a time, capped by
+  `MAX_ELEMENT_WRITE_BATCH`;
+- CLI summary output distinguishes unnamed-root row flattening from general
+  nested-column separation and reports serializer-aware batching choices;
+- Arrow is the default list serializer for Parquet imports, with msgpack still
+  available explicitly;
+- Arrow/msgpack use different default BatchArray sizes to match their memory
+  behavior.
+
+Remaining work:
+
+- `ct.explode()` and parent/element mapping for named repeated fields;
+- recursive flattening of nested repeated fields such as `trip.path.londiff`;
+- tests and benchmarks for `.info` block-size fields, `items_per_block` tuning,
+  compression ratio, and random lookup latency.
diff --git a/src/blosc2/__init__.py b/src/blosc2/__init__.py
index de9ed022..8a587c06 100644
--- a/src/blosc2/__init__.py
+++ b/src/blosc2/__init__.py
@@ -18,13 +18,7 @@
 
 import numpy as np
 
-_HAS_NUMBA = False
-try:
-    import numba
-
-    _HAS_NUMBA = True
-except ImportError:
-    pass
+_HAS_NUMBA = importlib.util.find_spec("numba") is not None
 # Do the platform check once at module level
 IS_WASM = platform.machine() == "wasm32"
 # IS_WASM = True  # for testing (comment this line out for production)
@@ -736,10 +730,12 @@ def _raise(exc):
     where,
 )
 from .schema import (
+    DictionarySpec,
     bool,
     bytes,
     complex64,
     complex128,
+    dictionary,
     field,
     float32,
     float64,
@@ -785,6 +781,8 @@ def _raise(exc):
     "bytes",
     "complex64",
     "complex128",
+    "dictionary",
+    "DictionarySpec",
     "field",
     "float32",
     "float64",
diff --git a/src/blosc2/batch_array.py b/src/blosc2/batch_array.py
index 8992f161..72c435a9 100644
--- a/src/blosc2/batch_array.py
+++ b/src/blosc2/batch_array.py
@@ -49,6 +49,8 @@ def __init__(self, parent: BatchArray, nbatch: int, lazybatch: bytes) -> None:
         self._items: list[Any] | None = None
         self._cached_block_index: int | None = None
         self._cached_block: list[Any] | None = None
+        self._cached_block_column_index: int | None = None
+        self._cached_block_column = None
         self._nbytes, self._cbytes, self._nblocks = blosc2.get_cbuffer_sizes(lazybatch)
 
     def _normalize_index(self, index: int) -> int:
@@ -74,6 +76,17 @@ def _get_block(self, block_index: int) -> list[Any]:
         self._cached_block = block
         return block
 
+    def _get_block_item(self, block_index: int, item_index: int) -> Any:
+        if self._cached_block_index == block_index and self._cached_block is not None:
+            return self._cached_block[item_index]
+        if self._parent._serializer != "arrow":
+            return self._get_block(block_index)[item_index]
+        if self._cached_block_column_index != block_index or self._cached_block_column is None:
+            payload = self._parent.schunk.get_vlblock(self._nbatch, block_index)
+            self._cached_block_column = self._parent._deserialize_arrow_block_column(payload)
+            self._cached_block_column_index = block_index
+        return self._cached_block_column[item_index].as_py()
+
     def __getitem__(self, index: int | slice) -> Any | list[Any]:
         if isinstance(index, slice):
             items = self._decode_items()
@@ -87,9 +100,8 @@ def __getitem__(self, index: int | slice) -> Any | list[Any]:
             block_index, item_index = divmod(index, items_per_block)
             if block_index >= self._nblocks:
                 raise IndexError("Batch index out of range")
-            block = self._get_block(block_index)
             try:
-                return block[item_index]
+                return self._get_block_item(block_index, item_index)
             except IndexError as exc:
                 raise IndexError("Batch index out of range") from exc
         items = self._decode_items()
@@ -593,11 +605,17 @@ def _guess_blocksize(self, payload_sizes: list[int]) -> int:
         if not payload_sizes:
             raise ValueError("BatchArray entries cannot be empty")
         clevel = self.cparams.clevel
+        # For serialized batch payloads, especially Arrow IPC, L1-sized blocks are often
+        # too small for codecs like Zstd to exploit cross-row redundancy.  Use larger
+        # cache-budget tiers as clevel increases, while avoiding full L2 blocks at the
+        # default clevel to keep random access reasonably granular.
         if clevel == 9:
             return len(payload_sizes)
-        if 0 < clevel <= 5:
+        if 0 < clevel <= 3:
             budget = blosc2.cpu_info.get("l1_data_cache_size")
-        elif 5 < clevel < 9:
+        elif 3 < clevel <= 6:
+            budget = blosc2.cpu_info.get("l2_cache_size") // 2
+        elif 6 < clevel < 9:
             budget = blosc2.cpu_info.get("l2_cache_size")
         else:
             return len(payload_sizes)
@@ -625,9 +643,12 @@ def _serialize_msgpack_block(self, items: list[Any]) -> bytes:
         return payload
 
     def _serialize_arrow_block(self, items) -> bytes:
-        pa, _ = self._require_pyarrow()
+        pa, pa_ipc = self._require_pyarrow()
         batch = pa.record_batch([items], schema=self._get_arrow_schema())
-        payload = batch.serialize().to_pybytes()
+        sink = pa.BufferOutputStream()
+        with pa_ipc.new_stream(sink, batch.schema) as writer:
+            writer.write_batch(batch)
+        payload = sink.getvalue().to_pybytes()
         _check_serialized_size(payload)
         return payload
 
@@ -639,16 +660,34 @@ def _serialize_block(self, items: Any) -> bytes:
     def _deserialize_msgpack_block(self, payload: bytes) -> list[Any]:
         return msgpack_unpackb(payload)
 
-    def _deserialize_arrow_block(self, payload: bytes) -> list[Any]:
+    def _deserialize_arrow_block_column(self, payload: bytes):
         pa, pa_ipc = self._require_pyarrow()
-        batch = pa_ipc.read_record_batch(pa.BufferReader(payload), self._get_arrow_schema())
-        return batch.column(0).to_pylist()
+        try:
+            reader = pa_ipc.open_stream(pa.BufferReader(payload))
+            batch = reader.read_next_batch()
+        except (pa.ArrowInvalid, OSError):
+            # Backward compatibility for older arrow-serializer blocks written
+            # as bare serialized RecordBatch payloads.  Those cannot represent
+            # dictionary batches reliably, so new blocks use IPC streams.
+            batch = pa_ipc.read_record_batch(pa.BufferReader(payload), self._get_arrow_schema())
+        return batch.column(0)
+
+    def _deserialize_arrow_block(self, payload: bytes) -> list[Any]:
+        return self._deserialize_arrow_block_column(payload).to_pylist()
 
     def _deserialize_block(self, payload: bytes) -> list[Any]:
         if self._serializer == "arrow":
             return self._deserialize_arrow_block(payload)
         return self._deserialize_msgpack_block(payload)
 
+    def _deserialize_arrow_block_item(self, payload: bytes, item_index: int) -> Any:
+        return self._deserialize_arrow_block_column(payload)[item_index].as_py()
+
+    def _deserialize_block_item(self, payload: bytes, item_index: int) -> Any:
+        if self._serializer == "arrow":
+            return self._deserialize_arrow_block_item(payload, item_index)
+        return self._deserialize_msgpack_block(payload)[item_index]
+
     def _vl_cparams_kwargs(self) -> dict[str, Any]:
         return asdict(self.schunk.cparams)
 
@@ -903,6 +942,7 @@ def info_items(self) -> list:
         return [
             ("type", f"{self.__class__.__name__}"),
             ("serializer", self.serializer),
+            ("items_per_block", self.items_per_block),
             ("nbatches", nbatches_value),
             ("nblocks", nblocks_value),
             ("nitems", sum(batch_sizes)),
diff --git a/src/blosc2/c2array.py b/src/blosc2/c2array.py
index 9a849650..c662740d 100644
--- a/src/blosc2/c2array.py
+++ b/src/blosc2/c2array.py
@@ -15,7 +15,6 @@
     from collections.abc import Sequence
 
 import numpy as np
-import requests
 
 import blosc2
 from blosc2.b2objects import encode_b2object_payload, make_b2object_carrier, write_b2object_payload
@@ -31,6 +30,12 @@
 """Default timeout for HTTP requests."""
 
 
+def _requests():
+    import requests
+
+    return requests
+
+
 @contextmanager
 def c2context(
     *,
@@ -109,7 +114,7 @@ def _xget(url, params=None, headers=None, auth_token=None, timeout=TIMEOUT):
     if auth_token:
         headers = headers.copy() if headers else {}
         headers["Cookie"] = auth_token
-    response = requests.get(url, params=params, headers=headers, timeout=timeout)
+    response = _requests().get(url, params=params, headers=headers, timeout=timeout)
     response.raise_for_status()
     return response
 
@@ -117,7 +122,7 @@ def _xget(url, params=None, headers=None, auth_token=None, timeout=TIMEOUT):
 def _xpost(url, json=None, auth_token=None, timeout=TIMEOUT):
     auth_token = auth_token or _subscriber_data["auth_token"]
     headers = {"Cookie": auth_token} if auth_token else None
-    response = requests.post(url, json=json, headers=headers, timeout=timeout)
+    response = _requests().post(url, json=json, headers=headers, timeout=timeout)
     response.raise_for_status()
     return response.json()
 
@@ -132,7 +137,7 @@ def _sub_url(urlbase, path):
 def login(username, password, urlbase):
     url = _sub_url(urlbase, "auth/jwt/login")
     creds = {"username": username, "password": password}
-    resp = requests.post(url, data=creds, timeout=TIMEOUT)
+    resp = _requests().post(url, data=creds, timeout=TIMEOUT)
     resp.raise_for_status()
     return "=".join(list(resp.cookies.items())[0])
 
@@ -234,7 +239,7 @@ def __init__(self, path: str, /, urlbase: str | None = None, auth_token: str | N
         # Try to 'open' the remote path
         try:
             self.meta = info(self.path, self.urlbase, auth_token=self.auth_token)
-        except requests.HTTPError as err:
+        except _requests().HTTPError as err:
             raise FileNotFoundError(f"Remote path not found: {path}.\nError was: {err}") from err
         cparams = self.meta["schunk"]["cparams"]
         # Remove "filters, meta" from cparams; this is an artifact from the server
diff --git a/src/blosc2/cli/parquet_to_blosc2.py b/src/blosc2/cli/parquet_to_blosc2.py
index ba513c9b..8f5c2507 100644
--- a/src/blosc2/cli/parquet_to_blosc2.py
+++ b/src/blosc2/cli/parquet_to_blosc2.py
@@ -44,9 +44,10 @@
 from typing import Any
 
 import blosc2
-from blosc2.schema_compiler import schema_to_dict
+from blosc2.schema_compiler import _validate_column_name, schema_to_dict
 
 DEFAULT_BATCH_SIZE = 2048
+MAX_ELEMENT_WRITE_BATCH = 5_000_000  # cap on flattened elements yielded per write
 
 
 def require_pyarrow():
@@ -174,7 +175,14 @@ def build_parser() -> argparse.ArgumentParser:
         "--max-rows",
         type=int,
         default=None,
-        help="Maximum number of rows to import from the source parquet file; imports all rows by default.",
+        help=(
+            "Maximum number of CTable rows to import.  "
+            "In normal mode this equals the number of Parquet rows read.  "
+            "With separate nested columns enabled for an unnamed-root list<struct<...>> "
+            "file, the unit is list elements "
+            "(i.e. the number of rows in the resulting CTable), "
+            "not outer Parquet rows."
+        ),
     )
     parser.add_argument(
         "--batch-size",
@@ -186,7 +194,9 @@ def build_parser() -> argparse.ArgumentParser:
         "--blosc2-batch-size",
         type=int,
         default=None,
-        help="Rows grouped into each persisted BatchArray batch for imported Blosc2 varlen/list columns.",
+        help="Internal batch_rows for BatchArray/varlen columns in the imported CTable. "
+        "Defaults to the blocks value from blosc2.compute_chunks_blocks() based on "
+        "the estimated CTable row count.",
     )
     parser.add_argument(
         "--blosc2-items-per-block",
@@ -197,6 +207,16 @@ def build_parser() -> argparse.ArgumentParser:
             "Defaults to BatchArray's automatic heuristic."
         ),
     )
+    parser.add_argument(
+        "--list-serializer",
+        choices=["msgpack", "arrow"],
+        default="arrow",
+        help=(
+            "Serializer for imported list columns. 'arrow' is the default and stores Arrow list "
+            "batches directly, which is much faster for deeply nested lists but requires PyArrow "
+            "when reading those columns later. Use 'msgpack' to avoid that read-time dependency."
+        ),
+    )
     parser.add_argument("--use-dict", action="store_true", help="Enable C-Blosc2 dictionary compression.")
     parser.add_argument(
         "--float-trunc-prec",
@@ -236,7 +256,12 @@ def build_parser() -> argparse.ArgumentParser:
         "--batch-report-every",
         type=int,
         default=1,
-        help="Print progress every N batches; the final batch is always reported.",
+        help="With --progress, print progress every N batches; the final batch is always reported.",
+    )
+    parser.add_argument(
+        "--progress",
+        action="store_true",
+        help="Print import progress lines. By default, only the import summary is shown.",
     )
     parser.add_argument(
         "--profile",
@@ -244,6 +269,29 @@ def build_parser() -> argparse.ArgumentParser:
         help="Run the selected operation under cProfile and print cumulative timing stats.",
     )
     parser.add_argument("--overwrite", action="store_true")
+    parser.add_argument(
+        "--decode-dictionaries",
+        action="store_true",
+        help=(
+            "Decode Arrow dictionary-encoded columns to plain vlstring instead of preserving "
+            "the dictionary encoding.  By default, supported dictionary columns "
+            "(string values with integer indices) are imported as Blosc2 dictionary columns."
+        ),
+    )
+    parser.add_argument(
+        "--separate-nested-cols",
+        action=argparse.BooleanOptionalAction,
+        default=True,
+        dest="separate_nested_cols",
+        help=(
+            "Import nested columns as separate CTable columns where possible. "
+            "Top-level struct fields are flattened recursively into dotted leaf columns "
+            "(e.g. trip.begin.lon).  For a single unnamed top-level list<struct<…>> "
+            "field (the Awkward Array / Chicago-taxi layout), flatten the outer list "
+            "so that each element becomes a CTable row. Enabled by default; use "
+            "--no-separate-nested-cols when closer Parquet schema fidelity is desired."
+        ),
+    )
     return parser
 
 
@@ -272,11 +320,40 @@ def _release_arrow_temporaries(pa) -> None:
         pa.default_memory_pool().release_unused()
 
 
+def ctable_column_name_map(schema) -> dict[str, str]:
+    """Return a mapping from Arrow field names to CTable-safe column names.
+
+    Remaps invalid names (empty strings, names starting with '_', names
+    containing '/') to safe substitutes like ``column_0``.
+    """
+    used: set[str] = set()
+    result: dict[str, str] = {}
+    for i, field in enumerate(schema):
+        original = field.name
+        try:
+            _validate_column_name(original)
+            candidate = original
+        except ValueError:
+            candidate = f"column_{i}"
+        if candidate in used:
+            base = candidate
+            suffix = 1
+            while f"{base}_{suffix}" in used:
+                suffix += 1
+            candidate = f"{base}_{suffix}"
+        used.add(candidate)
+        result[original] = candidate
+    return result
+
+
 def classify_columns(  # noqa: C901
     pa,
     schema,
     fixed_string_lengths: dict[str, int] | None = None,
     fixed_bytes_lengths: dict[str, int] | None = None,
+    *,
+    decode_dictionaries: bool = False,
+    separate_nested_cols: bool = True,
 ):
     """Classify Parquet schema columns into importable categories."""
     fixed_cols: dict[str, object] = {}
@@ -289,8 +366,14 @@ def classify_columns(  # noqa: C901
     for field in schema:
         t = field.type
         if pa.types.is_struct(t):
-            struct_wrap_cols[field.name] = pa.list_(t)
-            conversions[field.name] = {"conversion": "struct_wrapped_as_singleton_list"}
+            if separate_nested_cols:
+                # Let CTable.from_arrow() apply its normal struct flattening so
+                # top-level structs become dotted leaf columns.
+                fixed_cols[field.name] = field
+                conversions[field.name] = {"conversion": "struct_flattened_to_columns"}
+            else:
+                struct_wrap_cols[field.name] = pa.list_(t)
+                conversions[field.name] = {"conversion": "struct_wrapped_as_singleton_list"}
             continue
         if pa.types.is_list(t) or pa.types.is_large_list(t):
             value_type = t.value_type
@@ -299,6 +382,30 @@ def classify_columns(  # noqa: C901
             else:
                 fixed_cols[field.name] = field
             continue
+        if pa.types.is_dictionary(t):
+            vt = t.value_type
+            if vt in (pa.string(), pa.large_string(), pa.utf8(), pa.large_utf8()):
+                if decode_dictionaries:
+                    # Decode to plain vlstring.
+                    fixed_cols[field.name] = pa.field(
+                        field.name, pa.string(), nullable=field.nullable, metadata=field.metadata
+                    )
+                    conversions[field.name] = {
+                        "conversion": "dictionary_decoded_to_vlstring",
+                        "ordered": bool(t.ordered),
+                    }
+                else:
+                    fixed_cols[field.name] = field
+                    conversions[field.name] = {
+                        "conversion": "dictionary_preserved",
+                        "ordered": bool(t.ordered),
+                    }
+            else:
+                conversions[field.name] = {
+                    "conversion": "skipped",
+                    "reason": f"unsupported dictionary value type: {vt}",
+                }
+            continue
         if pa.types.is_boolean(t):
             fixed_cols[field.name] = field
             if field.nullable:
@@ -352,21 +459,41 @@ def build_import_schema(
     fixed_cols: dict,
     struct_wrap_cols: dict,
     timestamp_units: dict[str, str] | None = None,
+    column_name_map: dict[str, str] | None = None,
 ):
     """Build the Arrow schema passed to CTable.from_arrow()."""
     timestamp_units = timestamp_units or {}
+    column_name_map = column_name_map or {}
     fields = []
     for field in original_schema:
+        ctable_name = column_name_map.get(field.name, field.name)
         if field.name in struct_wrap_cols:
-            fields.append(pa.field(field.name, struct_wrap_cols[field.name], nullable=True))
+            fields.append(pa.field(ctable_name, struct_wrap_cols[field.name], nullable=True))
         elif field.name in fixed_cols:
             unit = timestamp_units.get(field.name)
             if unit is not None:
                 fields.append(
-                    pa.field(field.name, pa.timestamp(unit, tz=field.type.tz), nullable=field.nullable)
+                    pa.field(ctable_name, pa.timestamp(unit, tz=field.type.tz), nullable=field.nullable)
                 )
             else:
-                fields.append(field)
+                # Use the field from fixed_cols in case it was remapped (e.g. dict→string)
+                fc = fixed_cols[field.name]
+                if hasattr(fc, "type") and fc.type != field.type:
+                    # fc has the remapped type; use ctable_name for the field name
+                    fields.append(
+                        pa.field(
+                            ctable_name,
+                            fc.type,
+                            nullable=fc.nullable,
+                            metadata=fc.metadata if fc.metadata else None,
+                        )
+                    )
+                elif ctable_name != field.name:
+                    fields.append(
+                        pa.field(ctable_name, field.type, nullable=field.nullable, metadata=field.metadata)
+                    )
+                else:
+                    fields.append(field)
     return pa.schema(fields)
 
 
@@ -614,11 +741,14 @@ def scan_string_and_bytes_lengths(pa, pf, args, schema) -> tuple[dict[str, int],
 
 
 def transform_batch(
-    pa, batch, selected_cols: list[str], struct_wrap_cols: dict, timestamp_units: dict[str, str]
+    pa,
+    batch,
+    selected_cols: list[str],
+    struct_wrap_cols: dict,
+    timestamp_units: dict[str, str],
+    import_schema=None,
 ):
     """Apply import-time Arrow conversions; pass everything else through."""
-    if not struct_wrap_cols and not timestamp_units:
-        return batch
     arrays = list(batch.columns)
     for name, unit in timestamp_units.items():
         idx = batch.schema.get_field_index(name)
@@ -636,18 +766,34 @@ def transform_batch(
             continue
         arr = batch.column(idx)
         arrays[idx] = pa.array([[v] if v is not None else None for v in arr.to_pylist()], type=target_type)
+    if import_schema is not None:
+        # Cast / rename arrays to match import_schema (e.g. dict→string, renamed columns).
+        for i, field in enumerate(import_schema):
+            if not arrays[i].type.equals(field.type):
+                arrays[i] = arrays[i].cast(field.type, safe=True)
+        return pa.record_batch(arrays, schema=import_schema)
+    if not struct_wrap_cols and not timestamp_units:
+        return batch
     return pa.record_batch(arrays, names=selected_cols)
 
 
-def store_original_arrow_metadata(ct, original_schema, imported_schema, conversions: dict) -> None:
+def store_original_arrow_metadata(
+    ct, original_schema, imported_schema, conversions: dict, column_name_map: dict | None = None
+) -> None:
+    column_name_map = column_name_map or {}
     fields_meta = {}
     for field in original_schema:
         entry = conversions.get(field.name)
         if entry is None:
             continue
         entry = dict(entry)
+        ctable_name = column_name_map.get(field.name, field.name)
+        if ctable_name != field.name:
+            entry["ctable_name"] = ctable_name
         entry["original_arrow_type"] = str(field.type)
-        if field.name in imported_schema.names:
+        if ctable_name in imported_schema.names:
+            entry["ctable_arrow_type"] = str(imported_schema.field(ctable_name).type)
+        elif field.name in imported_schema.names:
             entry["ctable_arrow_type"] = str(imported_schema.field(field.name).type)
         fields_meta[field.name] = entry
     ct._schema.metadata = {
@@ -692,6 +838,13 @@ def print_import_plan(
     fixed_bytes_cols = [
         n for n, e in conversions.items() if e.get("conversion") in {"fixed_bytes", "fixed_bytes_nullable"}
     ]
+    dict_cols = [n for n, e in conversions.items() if e.get("conversion") == "dictionary_preserved"]
+    dict_decoded_cols = [
+        n for n, e in conversions.items() if e.get("conversion") == "dictionary_decoded_to_vlstring"
+    ]
+    flattened_structs = [
+        n for n, e in conversions.items() if e.get("conversion") == "struct_flattened_to_columns"
+    ]
     wrapped_structs = list(struct_wrap_cols)
     skipped = {n: e for n, e in conversions.items() if e.get("conversion") == "skipped"}
     print(f"Input:                 {input_path} ({input_path.stat().st_size / 1e6:.1f} MB)")
@@ -699,14 +852,21 @@ def print_import_plan(
     print(f"CTable store:          {ctable_store_kind(output_path)}")
     print(f"Rows:                  {pf.metadata.num_rows:,}")
     if args.max_rows is not None:
-        print(f"Rows to import:        {min(args.max_rows, pf.metadata.num_rows):,}")
+        print(f"Rows to import:        {min(args.max_rows, pf.metadata.num_rows):,} (Parquet rows)")
     print(f"Parquet columns:       {len(parquet_schema)}")
     print(f"Imported columns:      {len(fixed_cols) + len(struct_wrap_cols)}")
-    print(f"  Fixed-width:         {len(fixed_cols) - len(vlstring_cols) - len(vlbytes_cols)}")
+    n_fixed_non_string = (
+        len(fixed_cols) - len(vlstring_cols) - len(vlbytes_cols) - len(dict_cols) - len(dict_decoded_cols)
+    )
+    print(f"  Fixed-width:         {n_fixed_non_string}")
     print(f"  Fixed strings:       {len(fixed_string_cols)}")
     print(f"  Fixed bytes:         {len(fixed_bytes_cols)}")
     print(f"  vlstring:            {len(vlstring_cols)}")
     print(f"  vlbytes:             {len(vlbytes_cols)}")
+    print(f"  Dictionary:          {len(dict_cols)}")
+    if dict_decoded_cols:
+        print(f"  Dict→vlstring:       {len(dict_decoded_cols)}")
+    print(f"  Struct→columns:      {len(flattened_structs)}")
     print(f"  Struct→list:         {len(wrapped_structs)}")
     print(f"  Nullable scalars:    {len(nullable_scalars)}")
     print(f"  Skipped unsupported: {len(skipped)}")
@@ -720,6 +880,7 @@ def print_import_plan(
     print(f"Blosc2 batch size:     {args.blosc2_batch_size:,}")
     if args.blosc2_items_per_block is not None:
         print(f"Blosc2 items/block:    {args.blosc2_items_per_block:,}")
+    print(f"List serializer:       {args.list_serializer}")
     print(f"Codec / level:         {args.codec} / {args.clevel}")
     print(f"Use dict:              {args.use_dict}")
     trunc_global = getattr(args, "float_trunc_prec_global", None)
@@ -734,7 +895,7 @@ def print_import_plan(
     print()
 
 
-def progress_batches(pa, pf, args, selected_cols, struct_wrap_cols, timestamp_units):
+def progress_batches(pa, pf, args, selected_cols, struct_wrap_cols, timestamp_units, import_schema=None):
     rows_done = 0
     t0 = time.perf_counter()
     total = pf.metadata.num_rows if args.max_rows is None else min(args.max_rows, pf.metadata.num_rows)
@@ -749,14 +910,16 @@ def progress_batches(pa, pf, args, selected_cols, struct_wrap_cols, timestamp_un
         report_batch_mem = args.mem_report and batch_n % args.mem_every == 0
         if report_batch_mem:
             memory_report(f"batch {batch_n} after parquet read", pa)
-        batch = transform_batch(pa, raw_batch, selected_cols, struct_wrap_cols, timestamp_units)
+        batch = transform_batch(
+            pa, raw_batch, selected_cols, struct_wrap_cols, timestamp_units, import_schema
+        )
         if report_batch_mem:
             memory_report(f"batch {batch_n} after transform", pa)
         rows_done += len(batch)
         elapsed = time.perf_counter() - t0
         rate = rows_done / elapsed if elapsed > 0 else 0.0
         eta = (total - rows_done) / rate if rate > 0 else 0.0
-        if batch_n % args.batch_report_every == 0 or rows_done >= total:
+        if args.progress and (batch_n % args.batch_report_every == 0 or rows_done >= total):
             print(
                 f"  batch {batch_n:4d}  {rows_done:>12,}/{total:,}  "
                 f"{elapsed:7.1f}s  {rate / 1e3:7.1f}k rows/s  ETA {eta:6.0f}s",
@@ -769,10 +932,246 @@ def progress_batches(pa, pf, args, selected_cols, struct_wrap_cols, timestamp_un
             memory_report(f"batch {batch_n} after ctable write", pa)
 
 
+def _flatten_root_batches_with_progress(
+    pa,
+    pf,
+    inner_schema,
+    args,
+    capacity_hint=None,
+):
+    """Yield flattened :class:`pyarrow.RecordBatch` objects from an unnamed-root Parquet file.
+
+    Reads Parquet batches, flattens the outer ``list<struct<...>>`` column via
+    ``ListArray.flatten()``, and honours ``args.max_rows`` as an element-level
+    row limit.  When ``args.progress`` is enabled, progress is printed per
+    Parquet batch according to ``args.batch_report_every``.
+
+    Each flattened Parquet batch is yielded as a single write to CTable so that
+    the per-write Python/Arrow overhead is amortised over as many rows as
+    possible.  Batches exceeding ``MAX_ELEMENT_WRITE_BATCH`` are split into
+    cap-sized chunks to bound memory usage.
+    """
+    rows_done = 0
+    max_rows = args.max_rows
+    t0 = time.perf_counter()
+    # total_str is the CTable-row (element) limit for the progress display.
+    total_str = f"{max_rows:,} CTable rows" if max_rows is not None else "?"
+    # Use capacity_hint as the estimated total for ETA when max_rows is not set.
+    estimated_total = max_rows if max_rows is not None else capacity_hint
+
+    for parquet_batch_n, raw_batch in enumerate(
+        pf.iter_batches(batch_size=args.parquet_batch_size), start=1
+    ):
+        if max_rows is not None and rows_done >= max_rows:
+            break
+
+        report_batch_mem = args.mem_report and parquet_batch_n % args.mem_every == 0
+        if report_batch_mem:
+            memory_report(f"batch {parquet_batch_n} after parquet read", pa)
+
+        list_array = raw_batch.column(0)
+        struct_values = list_array.flatten()  # skips null outer-list rows
+
+        if len(struct_values) == 0:
+            continue
+
+        if max_rows is not None:
+            remaining = max_rows - rows_done
+            if len(struct_values) > remaining:
+                struct_values = struct_values.slice(0, remaining)
+
+        # Yield the whole flattened batch as one write; split only when it
+        # exceeds MAX_ELEMENT_WRITE_BATCH to bound peak memory.
+        n_elems = len(struct_values)
+
+        elapsed = time.perf_counter() - t0
+        rate = rows_done / elapsed if elapsed > 0 and rows_done > 0 else 0.0
+        eta_str = (
+            f"  ETA {(estimated_total - rows_done) / rate:6.0f}s"
+            if rate > 0 and estimated_total is not None
+            else ""
+        )
+        report_progress = parquet_batch_n % args.batch_report_every == 0 or (
+            max_rows is not None and rows_done + n_elems >= max_rows
+        )
+        n_writes = (n_elems + MAX_ELEMENT_WRITE_BATCH - 1) // MAX_ELEMENT_WRITE_BATCH
+        if args.progress and report_progress:
+            print(
+                f"  parquet batch {parquet_batch_n:4d}: "
+                f"{n_elems:>12,} CTable rows -> {n_writes:,} write(s)  "
+                f"done {rows_done:>12,}/{total_str}  "
+                f"{elapsed:7.1f}s  {rate / 1e3:7.1f}k rows/s{eta_str}",
+                flush=True,
+            )
+
+        for offset in range(0, n_elems, MAX_ELEMENT_WRITE_BATCH):
+            chunk = struct_values.slice(offset, min(MAX_ELEMENT_WRITE_BATCH, n_elems - offset))
+            sub_batch = pa.RecordBatch.from_struct_array(chunk)
+            rows_done += len(sub_batch)
+            yield sub_batch
+
+        if report_batch_mem:
+            memory_report(f"batch {parquet_batch_n} after flatten+write", pa)
+
+        if max_rows is not None and rows_done >= max_rows:
+            break
+
+
+def import_unnamed_root_separate_cols(
+    args,
+    input_path: Path,
+    output_path: Path,
+    pa,
+    pf,
+    parquet_schema,
+) -> list[str]:
+    """Import an unnamed-root ``list<struct<...>>`` Parquet file with nested column separation.
+
+    Each element of the unnamed root list becomes a CTable row.  Struct leaves
+    are stored as separate physical columns with dotted logical paths such as
+    ``trip.begin.lon`` and ``payment.fare``.
+
+    Returns the list of imported CTable column names.
+    """
+    from blosc2.schema_compiler import schema_to_dict
+
+    inner_schema = blosc2.CTable._inner_schema_for_unnamed_root(pa, parquet_schema)
+    total_parquet_rows = pf.metadata.num_rows if pf.metadata is not None else None
+
+    # ------------------------------------------------------------------
+    # Estimate total element count by sampling the first Parquet batch.
+    # This is used as capacity_hint so that compute_chunks_blocks() picks
+    # chunk/block sizes proportional to the actual data volume rather than
+    # defaulting to (1, 1) when the element count is unknown.
+    # pf.iter_batches() creates a fresh iterator each call, so sampling
+    # here does not affect the import iterator created later.
+    # ------------------------------------------------------------------
+    capacity_hint = None
+    estimated_batch_rows = None
+    if total_parquet_rows is not None and total_parquet_rows > 0:
+        try:
+            sample = next(
+                pf.iter_batches(batch_size=min(args.parquet_batch_size, total_parquet_rows)),
+                None,
+            )
+            if sample is not None and len(sample) > 0:
+                n_outer_sampled = len(sample)
+                n_elems_sampled = len(sample.column(0).flatten())
+                avg_per_outer_row = n_elems_sampled / n_outer_sampled
+                estimated_batch_rows = max(1, round(args.parquet_batch_size * avg_per_outer_row))
+                estimate = round(total_parquet_rows * avg_per_outer_row)
+                if args.max_rows is not None:
+                    estimate = min(estimate, args.max_rows)
+                capacity_hint = max(1, estimate)
+        except Exception:
+            pass  # sampling failure is non-fatal; from_arrow falls back to _EXPECTED_SIZE_DEFAULT
+
+    if args.blosc2_batch_size is None:
+        if args.list_serializer == "arrow":
+            # Arrow list storage appends incoming Arrow chunks directly, without
+            # materializing Python nested-list objects.  Use the natural flattened
+            # Parquet-batch scale (about 1M rows for Chicago taxi), capped only for
+            # pathological batches, so the displayed BatchArray size matches the
+            # actual write granularity better than the absolute cap would.
+            args.blosc2_batch_size = min(
+                MAX_ELEMENT_WRITE_BATCH,
+                estimated_batch_rows if estimated_batch_rows is not None else MAX_ELEMENT_WRITE_BATCH,
+            )
+        else:
+            # Msgpack list storage materializes nested Arrow list data as Python objects
+            # before serializing.  Keep its internal BatchArray batch_rows at Blosc2's
+            # cache-tuned block granularity instead of the larger Arrow write scale.
+            if capacity_hint is not None:
+                _, blocks = blosc2.compute_chunks_blocks((capacity_hint,))
+                args.blosc2_batch_size = max(1, blocks[0])
+            else:
+                args.blosc2_batch_size = DEFAULT_BATCH_SIZE
+
+    print(f"Input:                 {input_path} ({input_path.stat().st_size / 1e6:.1f} MB)")
+    print(f"Output:                {output_path}")
+    print(f"CTable store:          {ctable_store_kind(output_path)}")
+    print("Mode:                  unnamed-root list<struct> flattening")
+    print("Nested columns:        separated into dotted CTable columns")
+    if total_parquet_rows is not None:
+        print(f"Parquet rows:          {total_parquet_rows:,}")
+    if capacity_hint is not None:
+        print(f"Est. CTable rows:      ~{capacity_hint:,}")
+    n_inner = len(inner_schema)
+    print(f"Inner struct fields:   {n_inner}")
+    for f in inner_schema:
+        print(f"  {f.name}: {f.type}")
+    if args.max_rows is not None:
+        print(f"Max CTable rows:       {args.max_rows:,} (list elements)")
+    print(f"Parquet batch size:    {args.parquet_batch_size:,} outer rows")
+    blosc2_batch_note = (
+        f"auto, max: {MAX_ELEMENT_WRITE_BATCH:,}"
+        if getattr(args, "blosc2_batch_size_auto", False)
+        else f"max: {MAX_ELEMENT_WRITE_BATCH:,}"
+    )
+    print(f"Blosc2 batch size:     {args.blosc2_batch_size:,} BatchArray rows ({blosc2_batch_note})")
+    if args.blosc2_items_per_block is not None:
+        print(f"Blosc2 items/block:    {args.blosc2_items_per_block:,}")
+    print(f"List serializer:       {args.list_serializer}")
+    print(f"Codec / level:         {args.codec} / {args.clevel}")
+    print(f"Use dict:              {args.use_dict}")
+    print()
+
+    cparams = blosc2.CParams(codec=blosc2.Codec[args.codec], clevel=args.clevel, use_dict=args.use_dict)
+    t0 = time.perf_counter()
+    maybe_memory_report(args, "before CTable import", pa)
+
+    ct = blosc2.CTable.from_arrow(
+        inner_schema,
+        _flatten_root_batches_with_progress(pa, pf, inner_schema, args, capacity_hint=capacity_hint),
+        urlpath=str(output_path),
+        mode="w",
+        cparams=cparams,
+        capacity_hint=capacity_hint,
+        auto_null_sentinels=True,
+        blosc2_batch_size=args.blosc2_batch_size,
+        blosc2_items_per_block=args.blosc2_items_per_block,
+        list_serializer=args.list_serializer,
+    )
+
+    maybe_memory_report(args, "after CTable import", pa)
+
+    # Store the original_root provenance metadata so that reopened CTables know
+    # they came from an unnamed-root list<struct<...>> file.
+    nested_meta = ct._schema.metadata.get("nested", {})
+    nested_meta["original_root"] = {
+        "kind": "unnamed_list_struct",
+        "field_name": "",
+        "preserve_grouping": False,
+    }
+    ct._schema.metadata["nested"] = nested_meta
+    ct._storage.save_schema(schema_to_dict(ct._schema))
+
+    maybe_memory_report(args, "after metadata save", pa)
+
+    elapsed = time.perf_counter() - t0
+    rows = len(ct)
+    cols = len(ct.col_names)
+    col_names = list(ct.col_names)
+    ct.close()
+
+    maybe_memory_report(args, "after CTable close", pa)
+
+    output_size = (
+        output_path.stat().st_size
+        if output_path.is_file()
+        else sum(f.stat().st_size for f in output_path.rglob("*") if f.is_file())
+    )
+    print(f"Done in {elapsed:.2f}s")
+    print(f"Element rows imported: {rows:,}")
+    print(f"Columns imported:      {cols}")
+    print(f"Output size:           {output_size / 1e6:.1f} MB")
+    return col_names
+
+
 def import_parquet_to_ctable(args, input_path: Path, output_path: Path):
     if args.parquet_batch_size <= 0:
         raise ValueError("--parquet-batch-size must be positive")
-    if args.blosc2_batch_size <= 0:
+    if args.blosc2_batch_size is not None and args.blosc2_batch_size <= 0:
         raise ValueError("--blosc2-batch-size must be positive")
     if args.blosc2_items_per_block is not None and args.blosc2_items_per_block <= 0:
         raise ValueError("--blosc2-items-per-block must be positive")
@@ -799,6 +1198,14 @@ def import_parquet_to_ctable(args, input_path: Path, output_path: Path):
     maybe_memory_report(args, "after ParquetFile open", pa)
     parquet_schema = pf.schema_arrow
 
+    # ------------------------------------------------------------------
+    # Early dispatch: --separate-nested-cols for unnamed-root datasets
+    # ------------------------------------------------------------------
+    if getattr(args, "separate_nested_cols", False) and blosc2.CTable._detect_unnamed_root_list_struct(
+        pa, parquet_schema
+    ):
+        return import_unnamed_root_separate_cols(args, input_path, output_path, pa, pf, parquet_schema)
+
     fixed_string_lengths, fixed_bytes_lengths = scan_string_and_bytes_lengths(pa, pf, args, parquet_schema)
     maybe_memory_report(args, "after string/binary length scan", pa)
 
@@ -806,13 +1213,24 @@ def import_parquet_to_ctable(args, input_path: Path, output_path: Path):
     maybe_memory_report(args, "after timestamp unit scan", pa)
 
     fixed_cols, struct_wrap_cols, conversions, nullable_scalars = classify_columns(
-        pa, parquet_schema, fixed_string_lengths, fixed_bytes_lengths
+        pa,
+        parquet_schema,
+        fixed_string_lengths,
+        fixed_bytes_lengths,
+        decode_dictionaries=getattr(args, "decode_dictionaries", False),
+        separate_nested_cols=getattr(args, "separate_nested_cols", True),
     )
     maybe_memory_report(args, "after column classification", pa)
 
     selected_cols = [f.name for f in parquet_schema if f.name in fixed_cols or f.name in struct_wrap_cols]
-    import_schema = build_import_schema(pa, parquet_schema, fixed_cols, struct_wrap_cols, timestamp_units)
-    fixed_scalar_lengths = {**fixed_string_lengths, **fixed_bytes_lengths} or None
+    column_name_map = ctable_column_name_map(parquet_schema)
+    import_schema = build_import_schema(
+        pa, parquet_schema, fixed_cols, struct_wrap_cols, timestamp_units, column_name_map
+    )
+    fixed_scalar_lengths = {
+        column_name_map.get(name, name): length
+        for name, length in {**fixed_string_lengths, **fixed_bytes_lengths}.items()
+    } or None
     float_trunc_column_cparams = build_float_trunc_column_cparams(pa, import_schema, args)
     maybe_memory_report(args, "after import schema build", pa)
 
@@ -833,7 +1251,7 @@ def import_parquet_to_ctable(args, input_path: Path, output_path: Path):
 
     ct = blosc2.CTable.from_arrow(
         import_schema,
-        progress_batches(pa, pf, args, selected_cols, struct_wrap_cols, timestamp_units),
+        progress_batches(pa, pf, args, selected_cols, struct_wrap_cols, timestamp_units, import_schema),
         urlpath=str(output_path),
         mode="w",
         cparams=blosc2.CParams(codec=blosc2.Codec[args.codec], clevel=args.clevel, use_dict=args.use_dict),
@@ -844,10 +1262,11 @@ def import_parquet_to_ctable(args, input_path: Path, output_path: Path):
         auto_null_sentinels=True,
         blosc2_batch_size=args.blosc2_batch_size,
         blosc2_items_per_block=args.blosc2_items_per_block,
+        list_serializer=args.list_serializer,
         column_cparams=float_trunc_column_cparams or None,
     )
     maybe_memory_report(args, "after CTable import", pa)
-    store_original_arrow_metadata(ct, parquet_schema, import_schema, conversions)
+    store_original_arrow_metadata(ct, parquet_schema, import_schema, conversions, column_name_map)
     maybe_memory_report(args, "after metadata save", pa)
     elapsed = time.perf_counter() - t0
     rows = len(ct)
@@ -921,6 +1340,22 @@ def export_ctable_to_parquet(input_path: Path, output_path: Path, *, batch_size:
                 elif conversion in {"vlstring", "vlstring_nullable", "vlbytes", "vlbytes_nullable"}:
                     if str(arr.type) != str(field.type):
                         arr = arr.cast(field.type)
+                elif conversion in {"dictionary_preserved"}:
+                    # CTable emits dictionary<int32, string>; restore original type if needed.
+                    if str(arr.type) != str(field.type):
+                        arr = arr.cast(field.type, safe=True)
+                elif conversion in {"dictionary_decoded_to_vlstring"}:
+                    # Was decoded to vlstring on import; restore as dictionary type on export.
+                    if pa.types.is_dictionary(field.type):
+                        encoded = pa.DictionaryArray.from_arrays(
+                            *pa.array(arr.to_pylist())
+                            .dictionary_encode()
+                            .unify_dictionaries([pa.array(arr.to_pylist()).dictionary_encode()]),
+                            ordered=field.type.ordered,
+                        )
+                        arr = encoded.cast(field.type)
+                    elif str(arr.type) != str(field.type):
+                        arr = arr.cast(field.type)
                 elif str(arr.type) != str(field.type):
                     arr = pa.array(arr.to_pylist(), type=field.type)
                 arrays.append(arr)
@@ -1060,7 +1495,33 @@ def average_parquet_row_group_size(input_path: Path) -> int | None:
     return max(1, round(metadata.num_rows / metadata.num_row_groups))
 
 
+def is_unnamed_root_parquet_input(input_path: Path) -> bool:
+    if input_path.suffix != ".parquet" or not input_path.exists():
+        return False
+    try:
+        pa, pq = require_pyarrow()
+        pf = pq.ParquetFile(input_path)
+        return blosc2.CTable._detect_unnamed_root_list_struct(pa, pf.schema_arrow)
+    except Exception:
+        return False
+
+
 def resolve_default_batch_sizes(args, *, parquet_specified: bool, blosc2_specified: bool) -> None:
+    if getattr(args, "separate_nested_cols", False) and is_unnamed_root_parquet_input(args.input_path):
+        # In separate-nested mode the two batch-size options use different units:
+        # Parquet batches are outer rows, while Blosc2 batches are flattened
+        # CTable rows.  Keep them independent so a large write batch does not
+        # accidentally imply a huge Parquet read batch (and vice versa).
+        if not parquet_specified:
+            args.parquet_batch_size = average_parquet_row_group_size(args.input_path) or DEFAULT_BATCH_SIZE
+        if not blosc2_specified:
+            # Defer separate-nested defaults until import, where we have a sampled
+            # estimate of flattened CTable rows per Parquet batch.  Arrow uses that
+            # natural per-Parquet-batch scale; msgpack uses a smaller blocks-based
+            # scale because it materializes nested Python objects before serializing.
+            args.blosc2_batch_size = None
+        return
+
     if parquet_specified and not blosc2_specified:
         args.blosc2_batch_size = args.parquet_batch_size
     elif blosc2_specified and not parquet_specified:
@@ -1079,6 +1540,7 @@ def main(argv: list[str] | None = None) -> int:
         argv, "--batch-size"
     )
     blosc2_specified = _option_present(argv, "--blosc2-batch-size")
+    args.blosc2_batch_size_auto = not blosc2_specified
     resolve_default_batch_sizes(args, parquet_specified=parquet_specified, blosc2_specified=blosc2_specified)
 
     if args.profile:
diff --git a/src/blosc2/core.py b/src/blosc2/core.py
index 4cb022c0..a00e1725 100644
--- a/src/blosc2/core.py
+++ b/src/blosc2/core.py
@@ -24,7 +24,6 @@
 from typing import TYPE_CHECKING, ClassVar
 
 import numpy as np
-import requests
 
 import blosc2
 from blosc2 import blosc2_ext
@@ -1138,6 +1137,8 @@ def print_versions():
         import numexpr
 
         print(f"numexpr version: {numexpr.__version__}")
+    import requests
+
     print(f"requests version: {requests.__version__}")
     print(f"Python version: {sys.version}")
     (sysname, _nodename, release, version, machine, processor) = platform.uname()
diff --git a/src/blosc2/ctable.py b/src/blosc2/ctable.py
index 0c3cdd26..1f80dc3d 100644
--- a/src/blosc2/ctable.py
+++ b/src/blosc2/ctable.py
@@ -15,7 +15,6 @@
 import contextvars
 import copy
 import dataclasses
-import itertools
 import os
 import pprint
 import re
@@ -25,17 +24,29 @@
 from dataclasses import MISSING, dataclass
 from dataclasses import field as dataclass_field
 from textwrap import TextWrapper
-from typing import Any, Generic, Literal, TypeVar
+from typing import TYPE_CHECKING, Any, Generic, Literal, TypeVar
 
 import numpy as np
 
 import blosc2
 from blosc2 import compute_chunks_blocks
-from blosc2.ctable_storage import FileTableStorage, InMemoryTableStorage, TableStorage, TreeStoreTableStorage
+from blosc2.ctable_storage import (
+    FileTableStorage,
+    InMemoryTableStorage,
+    TableStorage,
+    TreeStoreTableStorage,
+    _column_name_to_relpath,
+    join_field_path,
+    split_field_path,
+)
 from blosc2.info import InfoReporter, format_nbytes_info
 from blosc2.list_array import ListArray, coerce_list_cell
 from blosc2.scalar_array import _ScalarVarLenArray
+
+if TYPE_CHECKING:
+    from blosc2.dictionary_column import DictionaryColumn
 from blosc2.schema import (
+    DictionarySpec,
     ListSpec,
     ObjectSpec,
     SchemaSpec,
@@ -249,6 +260,9 @@ def __getitem__(self, key):
 class _CTableInfoReporter(InfoReporter):
     """Info reporter that also preserves the historic ``t.info()`` call style."""
 
+    def __len__(self) -> int:
+        return len(self.obj.info_items)
+
     def __repr__(self) -> str:
         items = self.obj.info_items
         max_key_len = max(len(k) for k, _ in items)
@@ -546,6 +560,12 @@ def is_varlen_scalar(self) -> bool:
         col = self._table._schema.columns_by_name.get(self._col_name)
         return col is not None and isinstance(col.spec, (VLStringSpec, VLBytesSpec, StructSpec, ObjectSpec))
 
+    @property
+    def is_dictionary(self) -> bool:
+        """True if this column is a dictionary-encoded string column."""
+        col = self._table._schema.columns_by_name.get(self._col_name)
+        return col is not None and isinstance(col.spec, DictionarySpec)
+
     @property
     def _valid_rows(self):
         if self._mask is None:
@@ -580,20 +600,25 @@ def _values_from_key(self, key):  # noqa: C901
             if not (0 <= key < n_rows):
                 raise IndexError(f"index {key} is out of bounds for column with size {n_rows}")
             pos_true = _find_physical_index(self._valid_rows, key)
+            if self.is_dictionary:
+                return self._raw_col[int(pos_true)]
             return self._maybe_decode_timestamp_values(self._raw_col[int(pos_true)])
 
         elif isinstance(key, slice):
-            valid = self._valid_rows
-            real_pos = blosc2.where(valid, _arange(len(valid))).compute()
+            real_pos = np.where(self._valid_rows[:])[0]
             start, stop, step = key.indices(len(real_pos))
             if start >= stop:
-                return [] if (self.is_list or self.is_varlen_scalar) else np.array([], dtype=self.dtype)
+                return (
+                    []
+                    if (self.is_list or self.is_varlen_scalar or self.is_dictionary)
+                    else np.array([], dtype=self.dtype)
+                )
             selected_pos = real_pos[start:stop:step]  # physical row positions
             if self.is_computed:
                 lo, hi = int(selected_pos.min()), int(selected_pos.max())
                 chunk = np.asarray(self._raw_col[lo : hi + 1])
                 return chunk[selected_pos - lo]
-            if self.is_list or self.is_varlen_scalar:
+            if self.is_list or self.is_varlen_scalar or self.is_dictionary:
                 return self._raw_col[selected_pos]
             return self._maybe_decode_timestamp_values(np.asarray(self._raw_col[selected_pos]))
 
@@ -608,17 +633,17 @@ def _values_from_key(self, key):  # noqa: C901
             if self.is_computed:
                 raw_np = np.asarray(self._raw_col[:])
                 return raw_np[phys_indices]
-            if self.is_list or self.is_varlen_scalar:
+            if self.is_list or self.is_varlen_scalar or self.is_dictionary:
                 return self._raw_col[phys_indices]
             return self._maybe_decode_timestamp_values(self._raw_col[phys_indices])
 
         elif isinstance(key, (list, tuple, np.ndarray)):
-            real_pos = blosc2.where(self._valid_rows, _arange(len(self._valid_rows))).compute()
+            real_pos = np.where(self._valid_rows[:])[0]
             phys_indices = np.array([real_pos[i] for i in key], dtype=np.int64)
             if self.is_computed:
                 raw_np = np.asarray(self._raw_col[:])
                 return raw_np[phys_indices]
-            if self.is_list or self.is_varlen_scalar:
+            if self.is_list or self.is_varlen_scalar or self.is_dictionary:
                 return self._raw_col[phys_indices]
             return self._maybe_decode_timestamp_values(self._raw_col[phys_indices])
 
@@ -773,18 +798,39 @@ def __iter__(self):
             yield from data_chunk[mask_chunk]
 
     def __repr__(self) -> str:
-        preview_items = []
-        for value in itertools.islice(self, self._REPR_PREVIEW_ITEMS + 1):
-            if isinstance(value, np.generic):
-                value = value.item()
-            preview_items.append(repr(value))
-
-        truncated = len(preview_items) > self._REPR_PREVIEW_ITEMS
+        preview_len = self._REPR_PREVIEW_ITEMS + 1
+        if self.is_list:
+            label = self._table._dtype_info_label(
+                self.dtype, self._table._schema.columns_by_name[self._col_name].spec
+            )
+            preview_values = [f"<{label}>"] * min(len(self), preview_len)
+        else:
+            preview_pos = np.where(self._valid_rows[:])[0][:preview_len]
+            if self.is_dictionary or self.is_varlen_scalar:
+                preview_values = self._raw_col[preview_pos]
+            elif len(preview_pos) == 0:
+                preview_values = []
+            else:
+                preview_values = self._maybe_decode_timestamp_values(self._raw_col[preview_pos]).tolist()
+        truncated = len(preview_values) > self._REPR_PREVIEW_ITEMS
         if truncated:
-            preview_items = preview_items[: self._REPR_PREVIEW_ITEMS]
-            preview_items.append("...")
+            preview_values = preview_values[: self._REPR_PREVIEW_ITEMS]
+
+        if self.dtype is not None and self.dtype.kind in "biufc" and preview_values:
+            arr = np.asarray(preview_values, dtype=self.dtype)
+            preview = np.array2string(arr, separator=", ", max_line_width=10_000)[1:-1]
+            if truncated:
+                preview = f"{preview}, ..." if preview else "..."
+        else:
+            preview_items = []
+            for value in preview_values:
+                if isinstance(value, np.generic):
+                    value = value.item()
+                preview_items.append(repr(value))
+            if truncated:
+                preview_items.append("...")
+            preview = ", ".join(preview_items)
 
-        preview = ", ".join(preview_items)
         return f"Column({self._col_name!r}, dtype={self.dtype}, len={len(self)}, values=[{preview}])"
 
     def __len__(self):
@@ -796,6 +842,76 @@ def shape(self) -> tuple[int]:
         """Logical shape of the live column values."""
         return (len(self),)
 
+    @property
+    def info(self) -> _CTableInfoReporter:
+        """Get information about this column.
+
+        The report includes both logical/live-row details and, when available,
+        the physical storage details used internally by lazy predicates.
+
+        Examples
+        --------
+        >>> print(t["score"].info)
+        >>> t["score"].info()
+        """
+        return _CTableInfoReporter(self)
+
+    @property
+    def info_items(self) -> list[tuple[str, object]]:
+        """Structured summary items used by :attr:`info`."""
+        raw = self._raw_col
+        table = self._table
+        col_meta = table._schema.columns_by_name.get(self._col_name)
+        spec = col_meta.spec if col_meta is not None else None
+        physical_len = len(raw) if hasattr(raw, "__len__") else None
+        items: list[tuple[str, object]] = [
+            ("type", self.__class__.__name__),
+            ("name", self._col_name),
+            ("logical_length", len(self)),
+            ("physical_length", physical_len),
+            ("dtype", table._dtype_info_label(self.dtype, spec)),
+            ("computed", self.is_computed),
+            ("nullable", self.null_value is not None or getattr(spec, "nullable", False)),
+        ]
+
+        if self.is_list:
+            items.append(("storage", "list"))
+        elif self.is_varlen_scalar:
+            items.append(("storage", "variable-length scalar"))
+        elif self.is_dictionary:
+            items.append(("storage", "dictionary"))
+            items.append(("dictionary_size", len(raw.dictionary)))
+        else:
+            items.append(("storage", "ndarray" if isinstance(raw, blosc2.NDArray) else type(raw).__name__))
+
+        chunks = getattr(raw, "chunks", None)
+        blocks = getattr(raw, "blocks", None)
+        if chunks is not None:
+            items.append(("chunks", chunks))
+        if blocks is not None:
+            items.append(("blocks", blocks))
+
+        nbytes = getattr(raw, "nbytes", None)
+        cbytes = getattr(raw, "cbytes", None)
+        cratio = getattr(raw, "cratio", None)
+        if nbytes is not None:
+            items.append(("nbytes", format_nbytes_info(nbytes)))
+        if cbytes is not None:
+            items.append(("cbytes", format_nbytes_info(cbytes)))
+        if cratio is not None:
+            items.append(("cratio", f"{cratio:.2f}"))
+
+        urlpath = getattr(raw, "urlpath", None)
+        if urlpath is not None:
+            items.append(("urlpath", urlpath))
+        cparams = getattr(raw, "cparams", None)
+        dparams = getattr(raw, "dparams", None)
+        if cparams is not None:
+            items.append(("cparams", cparams))
+        if dparams is not None:
+            items.append(("dparams", dparams))
+        return items
+
     @property
     def ndim(self) -> int:
         """Number of logical dimensions."""
@@ -812,6 +928,11 @@ def _ensure_queryable(self) -> None:
                 f"Column {self._col_name!r} is a vlstring/vlbytes column; "
                 "lazy expressions and vectorized comparisons are not supported yet."
             )
+        if self.is_dictionary:
+            raise NotImplementedError(
+                f"Column {self._col_name!r} is a dictionary column; "
+                "use == and isin() for dictionary column comparisons."
+            )
 
     @staticmethod
     def _unwrap_operand(other):
@@ -964,17 +1085,97 @@ def __le__(self, other):
         return self._raw_col <= self._coerce_timestamp_operand(other)
 
     def __eq__(self, other):
+        if self.is_dictionary:
+            return self._dictionary_eq(other)
         self._ensure_queryable()
         if self._is_nullable_bool and isinstance(other, (bool, np.bool_)):
             return self._raw_col == int(other)
         return self._raw_col == self._coerce_timestamp_operand(other)
 
     def __ne__(self, other):
+        if self.is_dictionary:
+            result = self._dictionary_eq(other)
+            if isinstance(result, np.ndarray):
+                return ~result
+            return ~np.asarray(result, dtype=bool)
         self._ensure_queryable()
         if self._is_nullable_bool and isinstance(other, (bool, np.bool_)):
             return self._raw_col == int(not other)
         return self._raw_col != self._coerce_timestamp_operand(other)
 
+    def _dictionary_eq(self, other):
+        """Return a physical-slot boolean predicate for dictionary equality.
+
+        Regular fixed-width columns build predicates against their raw physical
+        arrays, whose length is the table slot capacity.  Dictionary predicates
+        need to use the same coordinate system so they can be combined with
+        regular predicates before aggregate/view code intersects them with
+        ``_valid_rows``.
+        """
+        dc = self._raw_col  # DictionaryColumn
+        spec = self._table._schema.columns_by_name[self._col_name].spec
+        if other is None:
+            target_code = spec.null_code
+        elif isinstance(other, str):
+            try:
+                target_code = dc.value_to_code(other)
+            except KeyError:
+                return blosc2.zeros(len(self._table._valid_rows), dtype=np.bool_)
+        else:
+            raise TypeError(
+                f"Dictionary column {self._col_name!r} can only be compared with str or None, "
+                f"got {type(other).__name__!r}."
+            )
+        pred = dc.codes == np.int32(target_code)
+        valid = self._lazy_valid_rows()
+        if len(dc.codes) != len(self._table._valid_rows):
+            physical = blosc2.zeros(len(self._table._valid_rows), dtype=np.bool_)
+            physical[: len(dc.codes)] = pred
+            pred = physical
+        return pred & valid
+
+    def isin(self, values) -> np.ndarray:
+        """Return a boolean array True where the live value is in *values*.
+
+        For dictionary columns this performs efficient integer-code membership
+        testing (no decoding of all values).  Values absent from the
+        dictionary are treated as not-present.
+
+        For non-dictionary columns this decodes all live values and tests
+        membership in a set.
+        """
+        if self.is_dictionary:
+            return self._dictionary_isin(values)
+        live_values = self[:]
+        test_set = set(values)
+        if isinstance(live_values, np.ndarray):
+            return np.array([v in test_set for v in live_values.tolist()], dtype=bool)
+        return np.array([v in test_set for v in live_values], dtype=bool)
+
+    def _dictionary_isin(self, values) -> np.ndarray:
+        """Return a boolean array for in-membership tests against a dictionary column."""
+        dc = self._raw_col  # DictionaryColumn
+        spec = self._table._schema.columns_by_name[self._col_name].spec
+        valid = self._valid_rows
+        live_pos = np.where(valid[:])[0]
+        if len(live_pos) == 0:
+            return np.zeros(0, dtype=bool)
+        # Map requested values to codes, ignoring absent values.
+        target_codes: set[int] = set()
+        for v in values:
+            if v is None:
+                target_codes.add(spec.null_code)
+            elif isinstance(v, str):
+                with contextlib.suppress(KeyError):
+                    target_codes.add(dc.value_to_code(v))
+        if not target_codes:
+            return np.zeros(len(live_pos), dtype=bool)
+        live_codes = np.asarray(dc.codes[live_pos], dtype=np.int32)
+        mask = np.zeros(len(live_codes), dtype=bool)
+        for code in target_codes:
+            mask |= live_codes == np.int32(code)
+        return mask
+
     def __gt__(self, other):
         self._ensure_queryable()
         return self._raw_col > self._coerce_timestamp_operand(other)
@@ -1179,8 +1380,11 @@ def is_null(self) -> np.ndarray:
 
         For varlen scalar columns (vlstring/vlbytes) nullability is represented
         as native ``None`` values, so this returns True wherever the value is
-        ``None``.
+        ``None``.  For dictionary columns, returns True where the code equals
+        the null_code (``-1`` by default).
         """
+        if self.is_dictionary:
+            return self._dictionary_eq(None)
         if self.is_varlen_scalar:
             return np.array([v is None for v in self], dtype=np.bool_)
         return self._null_mask_for(self[:])
@@ -1195,6 +1399,8 @@ def null_count(self) -> int:
         Returns ``0`` in O(1) if no ``null_value`` is configured for this column
         and the column is not a varlen scalar column.
         """
+        if self.is_dictionary:
+            return int(self.is_null().sum())
         if self.is_varlen_scalar:
             return sum(1 for v in self if v is None)
         if self.null_value is None:
@@ -1285,7 +1491,9 @@ def _normalize_sum_where(self, where):
             return None
         if isinstance(where, str):
             self._table._guard_varlen_scalar_expression(where)
-            where = blosc2.lazyexpr(where, self._table._where_expression_operands())
+            operands = self._table._where_expression_operands()
+            where, operands = self._table._rewrite_nested_expression(where, operands)
+            where = blosc2.lazyexpr(where, operands)
         if isinstance(where, np.ndarray) and where.dtype == np.bool_:
             where = blosc2.asarray(where)
         if isinstance(where, Column):
@@ -1309,7 +1517,10 @@ def _lazy_nonnull_mask(self, where=None):
         if not isinstance(raw, (blosc2.NDArray, blosc2.LazyExpr)):
             return NotImplemented
 
-        all_rows_visible = self._mask is None and self._table._n_rows == len(self._table._valid_rows)
+        table_n_rows = self._table._known_n_rows()
+        all_rows_visible = (
+            self._mask is None and table_n_rows is not None and table_n_rows == len(self._table._valid_rows)
+        )
         mask = None if all_rows_visible else self._lazy_valid_rows()
         if where is not None:
             mask = where if mask is None else mask & where
@@ -1340,7 +1551,7 @@ def _sum_lazy_fastpath(self, acc_dtype, where=None, *, jit=None, jit_backend=Non
             where is None
             and self._table.base is not None
             and total_rows
-            and self._table._n_rows / total_rows < 0.25
+            and self._table.nrows / total_rows < 0.25
         ):
             return NotImplemented
 
@@ -1666,6 +1877,171 @@ def _fmt_bytes(n: int) -> str:
 # We use a plain dict so that nothing extra needs to be imported.
 
 
+class _StructPathColumn:
+    """Virtual read-only column representing a struct prefix path.
+
+    Values are reconstructed per row from descendant dotted leaf columns.
+    """
+
+    def __init__(self, table: CTable, prefix: str, leaves: list[str]):
+        self._table = table
+        self._prefix = prefix
+        self._leaves = list(leaves)
+
+    def _leaf_is_null_at_logical(self, leaf: str, idx: int) -> bool:
+        col = self._table[leaf]
+        v = col[idx]
+        nv = col.null_value
+        if nv is None:
+            return v is None
+        try:
+            return bool(col._null_mask_for(np.asarray([v]))[0])
+        except Exception:
+            return v is None
+
+    def _row_value_at_logical(self, idx: int):
+        # If every descendant leaf is null at this row, represent the struct as None.
+        if self._leaves and all(self._leaf_is_null_at_logical(leaf, idx) for leaf in self._leaves):
+            return None
+        prefix_parts = split_field_path(self._prefix)
+        result: dict[str, Any] = {}
+        for leaf in self._leaves:
+            parts = split_field_path(leaf)
+            rel_parts = parts[len(prefix_parts) :]
+            if not rel_parts:
+                continue
+            node = result
+            for part in rel_parts[:-1]:
+                child = node.get(part)
+                if not isinstance(child, dict):
+                    child = {}
+                    node[part] = child
+                node = child
+            node[rel_parts[-1]] = self._table._normalize_scalar_value(self._table[leaf][idx])
+        return result
+
+    def __getitem__(self, key):
+        if isinstance(key, int):
+            return self._row_value_at_logical(key)
+        if isinstance(key, slice):
+            start, stop, step = key.indices(self._table.nrows)
+            return [self._row_value_at_logical(i) for i in range(start, stop, step)]
+        if isinstance(key, (list, np.ndarray)):
+            if len(key) == 0:
+                return []
+            if isinstance(key, np.ndarray) and key.dtype == np.bool_:
+                idxs = np.where(key)[0]
+            elif isinstance(key[0], (bool, np.bool_)):
+                idxs = [i for i, v in enumerate(key) if v]
+            else:
+                idxs = [int(i) for i in key]
+            return [self._row_value_at_logical(i) for i in idxs]
+        raise TypeError(f"Invalid index type: {type(key)}")
+
+    def __iter__(self):
+        for i in range(self._table.nrows):
+            yield self._row_value_at_logical(i)
+
+
+class _NestedColumnNamespace:
+    """Attribute proxy for dotted nested column paths.
+
+    Allows `t.trip.begin.lon` when the physical leaf column is named
+    `"trip.begin.lon"`.
+    """
+
+    def __init__(self, table: CTable, prefix: str):
+        self._table = table
+        self._prefix = prefix
+
+    def __getattr__(self, name: str):
+        path = join_field_path((*split_field_path(self._prefix), name))
+        if path in self._table._cols or path in self._table._computed_cols:
+            return Column(self._table, path)
+        path_parts = split_field_path(path)
+        for col_name in self._table.col_names:
+            parts = split_field_path(col_name)
+            if parts[: len(path_parts)] == path_parts and len(parts) > len(path_parts):
+                return _NestedColumnNamespace(self._table, path)
+        raise AttributeError(path)
+
+    def __repr__(self) -> str:
+        return f"<NestedColumnNamespace {self._prefix!r}>"
+
+
+class _LazyColumnDict(dict):
+    """Dict-like column cache that opens persistent columns on first use.
+
+    Persistent CTables can be wide, and opening every stored column eagerly is
+    expensive for workloads that touch only a small subset of columns, e.g.
+    ``blosc2.open(path).trip.km.sum()`` on a nested table.  Keep the public and
+    internal ``_cols`` access pattern mostly unchanged while deferring each
+    ``storage.open_*_column()`` call until that column is actually requested.
+
+    Methods that logically need all materialized columns, such as ``items()``
+    and ``values()``, force-load the cache for compatibility with normal
+    ``dict`` usage.  Name-oriented operations, such as ``keys()``, iteration,
+    ``len()``, and ``in``, operate from the schema column list without opening
+    the column payloads.
+    """
+
+    def __init__(self, table: CTable, storage: TableStorage, col_names: list[str]):
+        super().__init__()
+        self._table = table
+        self._storage = storage
+        self._col_names = list(col_names)
+        self._available = set(col_names)
+
+    def _load(self, name: str):
+        if name not in self._available:
+            raise KeyError(name)
+        if not dict.__contains__(self, name):
+            dict.__setitem__(self, name, self._table._open_column_from_storage(self._storage, name))
+        return dict.__getitem__(self, name)
+
+    def _load_all(self) -> None:
+        for name in self._col_names:
+            self._load(name)
+
+    def __getitem__(self, name: str):
+        return self._load(name)
+
+    def get(self, name: str, default=None):
+        return self._load(name) if name in self._available else default
+
+    def __contains__(self, name: object) -> bool:
+        return name in self._available
+
+    def __iter__(self):
+        return iter(self._col_names)
+
+    def __len__(self) -> int:
+        return len(self._col_names)
+
+    def keys(self):
+        return dict.fromkeys(self._col_names).keys()
+
+    def items(self):
+        self._load_all()
+        return dict.items(self)
+
+    def values(self):
+        self._load_all()
+        return dict.values(self)
+
+    def __setitem__(self, name: str, value) -> None:
+        if name not in self._available:
+            self._available.add(name)
+            self._col_names.append(name)
+        dict.__setitem__(self, name, value)
+
+    def __delitem__(self, name: str) -> None:
+        self._available.remove(name)
+        self._col_names.remove(name)
+        if dict.__contains__(self, name):
+            dict.__delitem__(self, name)
+
+
 class CTable(Generic[RowT]):
     """Columnar compressed table with typed columns and row-oriented access."""
 
@@ -1679,6 +2055,23 @@ class CTable(Generic[RowT]):
     #: :meth:`add_column` and :meth:`drop_column` are blocked on views.
     base: CTable | None
 
+    @property
+    def _n_rows(self) -> int:
+        """Number of live rows, computed lazily for reopened tables."""
+        n_rows = getattr(self, "_n_rows_cached", None)
+        if n_rows is None:
+            n_rows = int(blosc2.count_nonzero(self._valid_rows))
+            self._n_rows_cached = n_rows
+        return n_rows
+
+    @_n_rows.setter
+    def _n_rows(self, value: int | None) -> None:
+        self._n_rows_cached = value
+
+    def _known_n_rows(self) -> int | None:
+        """Return cached live-row count without triggering a scan."""
+        return getattr(self, "_n_rows_cached", None)
+
     def __init__(
         self,
         row_type: type[RowT],
@@ -1742,17 +2135,11 @@ def __init__(
             )
             self.col_names = [c["name"] for c in schema_dict["columns"]]
             self._valid_rows = storage.open_valid_rows()
+            self._cols = _LazyColumnDict(self, storage, self.col_names)
             for name in self.col_names:
                 cc = self._schema.columns_by_name[name]
-                if self._is_list_column(cc):
-                    col = storage.open_list_column(name)
-                elif self._is_varlen_scalar_column(cc):
-                    col = storage.open_varlen_scalar_column(name, cc.spec)
-                else:
-                    col = storage.open_column(name)
-                self._cols[name] = col
                 self._col_widths[name] = max(len(name), cc.display_width)
-            self._n_rows = int(blosc2.count_nonzero(self._valid_rows))
+            self._n_rows = None
             self._last_pos = None  # resolve lazily on first write
             # ---- Restore computed/materialized column metadata (if any) ----
             self._computed_cols = {}
@@ -1823,6 +2210,10 @@ def _is_list_column(col: CompiledColumn) -> bool:
     def _is_varlen_scalar_column(col: CompiledColumn) -> bool:
         return isinstance(col.spec, (VLStringSpec, VLBytesSpec, StructSpec, ObjectSpec))
 
+    @staticmethod
+    def _is_dictionary_column(col: CompiledColumn) -> bool:
+        return isinstance(col.spec, DictionarySpec)
+
     @staticmethod
     def _is_list_spec(spec: SchemaSpec) -> bool:
         return isinstance(spec, ListSpec)
@@ -1886,7 +2277,10 @@ def _resolve_nullable_specs(
         for col in schema.columns:
             spec = col.spec
             if (
-                isinstance(spec, (ListSpec, VLStringSpec, VLBytesSpec, StructSpec, ObjectSpec))
+                isinstance(
+                    spec,
+                    (ListSpec, VLStringSpec, VLBytesSpec, StructSpec, ObjectSpec, DictionarySpec),
+                )
                 or getattr(spec, "null_value", None) is not None
             ):
                 continue
@@ -1912,7 +2306,11 @@ def _resolve_nullable_specs(
 
     def _flush_varlen_columns(self) -> None:
         for col in self._schema.columns:
-            if self._is_list_column(col) or self._is_varlen_scalar_column(col):
+            if (
+                self._is_list_column(col)
+                or self._is_varlen_scalar_column(col)
+                or self._is_dictionary_column(col)
+            ):
                 self._cols[col.name].flush()
 
     def _init_columns(
@@ -1939,6 +2337,14 @@ def _init_columns(
                     dparams=col_storage.get("dparams"),
                 )
                 continue
+            if self._is_dictionary_column(col):
+                self._cols[col.name] = storage.create_dictionary_column(
+                    col.name,
+                    spec=col.spec,
+                    cparams=col_storage.get("cparams"),
+                    dparams=col_storage.get("dparams"),
+                )
+                continue
             # Recompute chunks/blocks using the actual dtype so that wide
             # string columns (e.g. U183642) don't produce multi-GB chunks.
             chunks = col_storage["chunks"]
@@ -1978,22 +2384,47 @@ def _resolve_column_storage(
             result["dparams"] = dparams
         return result
 
+    @staticmethod
+    def _flatten_nested_dict(d: dict, prefix: str = "") -> dict:
+        """Recursively flatten a nested dict into a dotted-key flat dict.
+
+        Works for both single-row dicts ``{field: value}`` and column-batch
+        dicts ``{field: array}``.  Leaves non-dict values unchanged.
+
+        Example::
+
+            {"trip": {"begin": {"lon": 1.0}}} -> {"trip.begin.lon": 1.0}
+        """
+        result = {}
+        for k, v in d.items():
+            full_key = join_field_path((*split_field_path(prefix), k)) if prefix else join_field_path((k,))
+            if isinstance(v, dict):
+                result.update(CTable._flatten_nested_dict(v, full_key))
+            else:
+                result[full_key] = v
+        return result
+
     def _normalize_row_input(self, data: Any) -> dict[str, Any]:
         """Normalize a row input to a ``{col_name: value}`` dict.
 
         Accepted shapes:
         - list / tuple  → positional, zipped with stored column names (computed columns skipped)
-        - dict          → used as-is
-        - dataclass     → ``dataclasses.asdict``
+        - dict          → used as-is (nested dicts are flattened to dotted keys)
+        - dataclass     → ``dataclasses.asdict`` (nested fields flattened)
         - np.void / structured scalar → field-name access
         """
         stored = self._append_input_col_names
         if isinstance(data, dict):
+            if any(isinstance(v, dict) for v in data.values()):
+                return self._flatten_nested_dict(data)
             return data
         if isinstance(data, (list, tuple)):
             return dict(zip(stored, data, strict=False))
         if dataclasses.is_dataclass(data) and not isinstance(data, type):
-            return dataclasses.asdict(data)
+            d = dataclasses.asdict(data)
+            if any(isinstance(v, dict) for v in d.values()):
+                return self._flatten_nested_dict(d)
+            return d
         if isinstance(data, (np.void, np.record)):
             return {name: data[name] for name in stored}
         # Fallback: try positional indexing
@@ -2009,6 +2440,9 @@ def _coerce_row_to_storage(self, row: dict[str, Any]) -> dict[str, Any]:
             elif self._is_varlen_scalar_column(col):
                 # Coercion is handled inside _ScalarVarLenArray.append.
                 result[col.name] = val
+            elif self._is_dictionary_column(col):
+                # Pass str/None through; DictionaryColumn.__setitem__ encodes.
+                result[col.name] = val
             elif isinstance(col.spec, timestamp):
                 if val is None:
                     result[col.name] = col.spec.null_value
@@ -2022,6 +2456,17 @@ def _coerce_row_to_storage(self, row: dict[str, Any]) -> dict[str, Any]:
                 result[col.name] = np.array(val, dtype=col.dtype).item()
         return result
 
+    def _open_column_from_storage(self, storage: TableStorage, name: str):
+        """Open one stored column from *storage*."""
+        cc = self._schema.columns_by_name[name]
+        if self._is_list_column(cc):
+            return storage.open_list_column(name)
+        if self._is_varlen_scalar_column(cc):
+            return storage.open_varlen_scalar_column(name, cc.spec)
+        if self._is_dictionary_column(cc):
+            return storage.open_dictionary_column(name, cc.spec)
+        return storage.open_column(name)
+
     def _resolve_last_pos(self) -> int:
         """Return the physical index of the next write slot.
 
@@ -2067,6 +2512,9 @@ def _grow(self) -> None:
             cc = self._schema.columns_by_name[name]
             if self._is_list_column(cc) or self._is_varlen_scalar_column(cc):
                 continue
+            if self._is_dictionary_column(cc):
+                col_arr.resize((c * 2,))
+                continue
             col_arr.resize((c * 2,))
         self._valid_rows.resize((c * 2,))
 
@@ -2083,10 +2531,14 @@ def _display_positions(self, head_tail: int = 10):
             return all_pos, np.array([], dtype=all_pos.dtype), 0
         return all_pos[:head_tail], all_pos[-head_tail:], hidden
 
-    def _display_widths(self) -> dict[str, int]:
+    def _display_widths(self, col_names: list[str] | None = None) -> dict[str, int]:
         widths: dict[str, int] = {}
-        single_col = len(self.col_names) == 1
-        for name in self.col_names:
+        col_names = self.col_names if col_names is None else col_names
+        single_col = len(col_names) == 1
+        for name in col_names:
+            if name == "...":
+                widths[name] = 3
+                continue
             spec = self._schema.columns_by_name.get(name)
             dtype_label = self._dtype_info_label(self._col_dtype(name), spec.spec if spec else None)
             widths[name] = max(self._col_widths[name], len(dtype_label))
@@ -2094,25 +2546,80 @@ def _display_widths(self) -> dict[str, int]:
                 widths[name] = max(widths[name], 80)
         return widths
 
+    def _display_columns(self) -> tuple[list[str], int]:
+        """Return terminal-width-friendly display columns and hidden count."""
+        col_names = list(self.col_names)
+        widths = self._display_widths(col_names)
+        widths["..."] = 3
+        total_width = sum(widths[n] + 2 for n in col_names) + 2 * max(0, len(col_names) - 1)
+        term_width = shutil.get_terminal_size((120, 20)).columns
+        if total_width <= term_width or len(col_names) <= 2:
+            return col_names, 0
+
+        selected: list[str] = []
+        left = 0
+        right = len(col_names) - 1
+        used = 0
+
+        def extra_width(name: str, n_existing: int) -> int:
+            return widths[name] + 2 + (2 if n_existing else 0)
+
+        # Account for an ellipsis column between left and right blocks.
+        used += widths["..."] + 2
+        while left <= right:
+            left_name = col_names[left]
+            need = extra_width(left_name, len(selected) + 1)
+            if used + need > term_width:
+                break
+            selected.append(left_name)
+            used += need
+            left += 1
+            if left > right:
+                break
+
+            right_name = col_names[right]
+            need = extra_width(right_name, len(selected) + 1)
+            if used + need > term_width:
+                break
+            selected.append(right_name)
+            used += need
+            right -= 1
+
+        left_cols = [n for n in col_names if n in selected and col_names.index(n) < left]
+        right_cols = [n for n in col_names if n in selected and col_names.index(n) > right]
+        display_cols = left_cols + ["..."] + right_cols
+        hidden = len(col_names) - len(left_cols) - len(right_cols)
+        return display_cols, hidden
+
     @staticmethod
     def _format_cell(value, width: int) -> str:
-        s = str(value)
+        if isinstance(value, np.datetime64):
+            s = str(value).replace("T", " ")
+            if s.endswith(".000"):
+                s = s[:-4]
+        else:
+            s = str(value)
         if len(s) > width:
             s = s[: width - 1] + "…"
         return f" {s:<{width}} "
 
-    def _format_display_row(self, values: dict, widths: dict[str, int]) -> str:
-        return "  ".join(self._format_cell(values[n], widths[n]) for n in self.col_names)
+    def _format_display_row(self, values: dict, widths: dict[str, int], col_names: list[str]) -> str:
+        return "  ".join(self._format_cell(values[n], widths[n]) for n in col_names)
 
-    def _rows_to_dicts(self, positions) -> list[dict]:
+    def _rows_to_dicts(self, positions, col_names: list[str] | None = None) -> list[dict]:
         if len(positions) == 0:
             return []
-        col_data = {n: self._fetch_col_at_positions(n, positions) for n in self.col_names}
+        col_names = self.col_names if col_names is None else col_names
+        real_cols = [n for n in col_names if n != "..."]
+        col_data = {n: self._fetch_col_at_positions(n, positions) for n in real_cols}
         rows = []
         for i in range(len(positions)):
             row = {}
-            for n in self.col_names:
-                row[n] = self._normalize_scalar_value(col_data[n][i])
+            for n in col_names:
+                # Keep NumPy scalar types for display so their compact string
+                # formatting is preserved (notably float32, e.g. 224.97
+                # instead of Python float's 224.97000122070312).
+                row[n] = "..." if n == "..." else col_data[n][i]
             rows.append(row)
         return rows
 
@@ -2121,31 +2628,44 @@ def __str__(self) -> str:
         nrows = self._n_rows
         ncols = len(self.col_names)
         head_pos, tail_pos, hidden = self._display_positions()
-        widths = self._display_widths()
-        sep = "  ".join("─" * (w + 2) for w in widths.values())
+        display_cols, hidden_cols = self._display_columns()
+        widths = self._display_widths(display_cols)
+        sep = "  ".join("─" * (widths[n] + 2) for n in display_cols)
+
+        dtype_row = {}
+        for n in display_cols:
+            if n == "...":
+                dtype_row[n] = "..."
+            else:
+                dtype_row[n] = self._dtype_info_label(
+                    self._col_dtype(n),
+                    self._schema.columns_by_name[n].spec if n in self._schema.columns_by_name else None,
+                )
 
         lines = [
-            self._format_display_row({n: n for n in self.col_names}, widths),
-            self._format_display_row(
-                {
-                    n: self._dtype_info_label(
-                        self._col_dtype(n),
-                        self._schema.columns_by_name[n].spec if n in self._schema.columns_by_name else None,
-                    )
-                    for n in self.col_names
-                },
-                widths,
-            ),
+            self._format_display_row({n: n for n in display_cols}, widths, display_cols),
+            self._format_display_row(dtype_row, widths, display_cols),
             sep,
         ]
-        lines.extend(self._format_display_row(row, widths) for row in self._rows_to_dicts(head_pos))
+        lines.extend(
+            self._format_display_row(row, widths, display_cols)
+            for row in self._rows_to_dicts(head_pos, display_cols)
+        )
         if hidden > 0:
-            lines.append(self._format_display_row(dict.fromkeys(self.col_names, "..."), widths))
-        lines.extend(self._format_display_row(row, widths) for row in self._rows_to_dicts(tail_pos))
+            lines.append(self._format_display_row(dict.fromkeys(display_cols, "..."), widths, display_cols))
+        lines.extend(
+            self._format_display_row(row, widths, display_cols)
+            for row in self._rows_to_dicts(tail_pos, display_cols)
+        )
         lines.append(sep)
         footer = f"{nrows:,} rows × {ncols} columns"
+        notes = []
         if hidden > 0:
-            footer += f"  ({hidden:,} rows hidden)"
+            notes.append(f"{hidden:,} rows hidden")
+        if hidden_cols > 0:
+            notes.append(f"{hidden_cols:,} columns hidden")
+        if notes:
+            footer += f"  ({', '.join(notes)})"
         lines.append(footer)
         return "\n".join(lines)
 
@@ -2170,6 +2690,17 @@ def _row_namedtuple_type(self):
             self._row_namedtuple_type_cache_cols = visible
         return self._row_namedtuple_type_cache
 
+    def _row_namedtuple_type_for_fields(self, fields: tuple[str, ...]):
+        cache = getattr(self, "_row_namedtuple_type_cache_by_fields", None)
+        if cache is None:
+            cache = {}
+            self._row_namedtuple_type_cache_by_fields = cache
+        row_type = cache.get(fields)
+        if row_type is None:
+            row_type = _make_namedtuple_row_type(fields)
+            cache[fields] = row_type
+        return row_type
+
     @staticmethod
     def _normalize_scalar_value(value):
         if isinstance(value, np.generic):
@@ -2195,8 +2726,32 @@ def _materialize_row(self, index: int):
         if not (0 <= index < n_rows):
             raise IndexError(f"row index {index} is out of bounds for table with {n_rows} rows")
         pos = _find_physical_index(self._valid_rows, index)
-        row_type = self._row_namedtuple_type()
-        return row_type(*(self._physical_row_value(name, int(pos)) for name in self.col_names))
+
+        nested_meta = self._schema.metadata.get("nested") if self._schema.metadata else None
+        reconstruct = isinstance(nested_meta, dict) and bool(nested_meta.get("reconstruct_rows", False))
+        if not reconstruct:
+            row_type = self._row_namedtuple_type()
+            return row_type(*(self._physical_row_value(name, int(pos)) for name in self.col_names))
+
+        row_dict: dict[str, Any] = {}
+        for name in self.col_names:
+            value = self._physical_row_value(name, int(pos))
+            parts = split_field_path(name)
+            if len(parts) <= 1:
+                row_dict[name] = value
+                continue
+            node = row_dict
+            for part in parts[:-1]:
+                child = node.get(part)
+                if not isinstance(child, dict):
+                    child = {}
+                    node[part] = child
+                node = child
+            node[parts[-1]] = value
+
+        fields = tuple(row_dict.keys())
+        row_type = self._row_namedtuple_type_for_fields(fields)
+        return row_type(*(row_dict[f] for f in fields))
 
     def iter_sorted(
         self,
@@ -2275,6 +2830,12 @@ def iter_sorted(
     # Open existing table (classmethod)
     # ------------------------------------------------------------------
 
+    @classmethod
+    def _open_from_existing_filestore(cls, urlpath: str, *, mode: str, store: blosc2.TreeStore) -> CTable:
+        """Open a root CTable reusing an already-opened TreeStore."""
+        storage = FileTableStorage(urlpath, mode, store=store)
+        return cls._open_from_storage(storage)
+
     @classmethod
     def open(cls, urlpath: str, *, mode: str = "r") -> CTable:
         """Open a persistent CTable from *urlpath*.
@@ -2463,6 +3024,23 @@ def _save_to_storage(self, storage: TableStorage) -> None:
                     disk_col.extend(self._cols[name][int(pos)] for pos in live_pos)
                     disk_col.flush()
                 continue
+            if self._is_dictionary_column(col):
+                src_dc = self._cols[name]
+                disk_dc = storage.create_dictionary_column(
+                    name,
+                    spec=col.spec,
+                    cparams=col.config.cparams if col.config.cparams is not None else self._table_cparams,
+                    dparams=col.config.dparams if col.config.dparams is not None else self._table_dparams,
+                )
+                # Copy dictionary values first
+                for v in src_dc.dictionary:
+                    disk_dc.encode(v)
+                disk_dc.flush()
+                # Copy live codes
+                if n_live > 0:
+                    raw_codes = np.asarray(src_dc.codes[live_pos], dtype=np.int32)
+                    disk_dc.codes[:n_live] = raw_codes
+                continue
             dtype_chunks, dtype_blocks = compute_chunks_blocks((capacity,), dtype=col.dtype)
             col_storage = self._resolve_column_storage(col, dtype_chunks, dtype_blocks)
             disk_col = storage.create_column(
@@ -2551,17 +3129,12 @@ def _open_from_storage(cls, storage: TableStorage) -> CTable:
         obj.base = None
 
         obj._valid_rows = storage.open_valid_rows()
+        obj._cols = _LazyColumnDict(obj, storage, col_names)
         for name in col_names:
             cc = schema.columns_by_name[name]
-            if obj._is_list_column(cc):
-                obj._cols[name] = storage.open_list_column(name)
-            elif obj._is_varlen_scalar_column(cc):
-                obj._cols[name] = storage.open_varlen_scalar_column(name, cc.spec)
-            else:
-                obj._cols[name] = storage.open_column(name)
             obj._col_widths[name] = max(len(name), cc.display_width)
 
-        obj._n_rows = int(blosc2.count_nonzero(obj._valid_rows))
+        obj._n_rows = None
         obj._last_pos = None
         obj._computed_cols = {}
         obj._materialized_cols = {}
@@ -2632,6 +3205,8 @@ def load(cls, urlpath: str) -> CTable:
                 disk_cols[col.name] = file_storage.open_list_column(col.name)
             elif cls._is_varlen_scalar_column(col):
                 disk_cols[col.name] = file_storage.open_varlen_scalar_column(col.name, col.spec)
+            elif cls._is_dictionary_column(col):
+                disk_cols[col.name] = file_storage.open_dictionary_column(col.name, col.spec)
             else:
                 disk_cols[col.name] = file_storage.open_column(col.name)
         phys_size = len(disk_valid)
@@ -2664,6 +3239,17 @@ def load(cls, urlpath: str) -> CTable:
                 mem_col.flush()
                 mem_cols[name] = mem_col
                 continue
+            if cls._is_dictionary_column(col):
+                mem_col = mem_storage.create_dictionary_column(name, spec=col.spec)
+                disk_dc = disk_cols[name]
+                # Copy dictionary values
+                for v in disk_dc.dictionary:
+                    mem_col.encode(v)
+                # Copy codes
+                if phys_size > 0:
+                    mem_col.codes[:phys_size] = disk_dc.codes[:phys_size]
+                mem_cols[name] = mem_col
+                continue
             col_chunks, col_blocks = compute_chunks_blocks((capacity,), dtype=col.dtype)
             mem_col = mem_storage.create_column(
                 name,
@@ -2830,17 +3416,27 @@ def select(self, cols: list[str]) -> CTable:
         Parameters
         ----------
         cols:
-            Ordered list of column names to keep.
+            Ordered list of column names to keep.  For tables with **nested
+            (dotted) column names**, a struct-prefix name automatically expands
+            to all descendant leaves::
+
+                t.select(["trip.begin"])   # expands to trip.begin.lon, trip.begin.lat
+                t.select(["trip"])          # expands to all trip.* leaves
 
         Raises
         ------
         KeyError
-            If any name in *cols* is not a column of this table.
+            If any name in *cols* is not a column of this table (and does not
+            match any struct prefix).
         ValueError
             If *cols* is empty.
         """
         if not cols:
             raise ValueError("select() requires at least one column name.")
+        expanded_cols = []
+        for name in cols:
+            expanded_cols.extend(self._expand_logical_column_selector(name))
+        cols = expanded_cols
         for name in cols:
             if name not in self._cols and name not in self._computed_cols:
                 raise KeyError(f"No column named {name!r}. Available: {self.col_names}")
@@ -3063,17 +3659,44 @@ def _resolve_arrow_columns(self, columns, include_computed: bool = True) -> list
             names = list(self.col_names)
             if not include_computed:
                 names = [name for name in names if name not in self._computed_cols]
+
+            # If top-level struct aliases are present in schema metadata (virtual
+            # entries not physically stored), prefer exporting them instead of
+            # their descendant dotted leaves.
+            virtual_structs = [
+                n
+                for n, cc in self._schema.columns_by_name.items()
+                if n not in self.col_names and isinstance(cc.spec, StructSpec)
+            ]
+            for alias in sorted(virtual_structs, key=len, reverse=True):
+                alias_parts = split_field_path(alias)
+                children = [
+                    n
+                    for n in names
+                    if split_field_path(n)[: len(alias_parts)] == alias_parts
+                    and len(split_field_path(n)) > len(alias_parts)
+                ]
+                if not children:
+                    continue
+                first = min(names.index(c) for c in children)
+                child_set = set(children)
+                names = [n for n in names if n not in child_set]
+                names.insert(first, alias)
         else:
-            names = list(columns)
+            names = []
+            for name in columns:
+                names.extend(self._expand_logical_column_selector(name))
         if len(set(names)) != len(names):
             raise ValueError("columns must be unique")
         for name in names:
-            if name not in self.col_names:
+            if name not in self.col_names and name not in self._schema.columns_by_name:
                 raise KeyError(f"No column named {name!r}. Available: {self.col_names}")
         return names
 
     @staticmethod
     def _pa_type_from_spec(pa, spec):
+        if isinstance(spec, DictionarySpec):
+            return pa.dictionary(pa.int32(), pa.string(), ordered=spec.ordered)
         if isinstance(spec, VLStringSpec):
             return pa.string()
         if isinstance(spec, VLBytesSpec):
@@ -3102,17 +3725,35 @@ def _pa_type_from_spec(pa, spec):
             return pa.large_binary()
         return pa.from_numpy_dtype(dtype)
 
+    def _export_arrow_names(self, names: list[str]) -> list[str]:
+        nested = self._schema.metadata.get("nested") if self._schema.metadata else None
+        exported = list(names)
+        if isinstance(nested, dict):
+            root_meta = nested.get("root")
+            if isinstance(root_meta, dict):
+                physical = root_meta.get("physical")
+                if isinstance(physical, str) and physical:
+                    exported = ["" if n == physical else n for n in exported]
+        for i, n in enumerate(names):
+            cc = self._schema.columns_by_name.get(n)
+            if n not in self.col_names and cc is not None and isinstance(cc.spec, StructSpec):
+                parts = split_field_path(n)
+                if len(parts) == 1:
+                    exported[i] = parts[0]
+        return exported
+
     def _arrow_schema_for_columns(self, columns=None, *, include_computed: bool = True):
         pa = self._require_pyarrow("to_arrow()/to_parquet()")
         names = self._resolve_arrow_columns(columns, include_computed=include_computed)
+        arrow_names = self._export_arrow_names(names)
         fields = []
-        for name in names:
+        for name, arrow_name in zip(names, arrow_names, strict=True):
             cc = self._schema.columns_by_name.get(name)
             if cc is not None:
                 pa_type = self._pa_type_from_spec(pa, cc.spec)
             else:
                 pa_type = pa.from_numpy_dtype(np.asarray(self[name][:0]).dtype)
-            fields.append(pa.field(name, pa_type))
+            fields.append(pa.field(arrow_name, pa_type))
         return pa.schema(fields)
 
     def iter_arrow_batches(
@@ -3127,11 +3768,17 @@ def iter_arrow_batches(
         self._validate_arrow_batch_size(batch_size)
         self._flush_varlen_columns()
         names = self._resolve_arrow_columns(columns, include_computed=include_computed)
+        arrow_names = self._export_arrow_names(names)
 
         for start in range(0, self._n_rows, batch_size):
             stop = min(start + batch_size, self._n_rows)
             arrays = []
             for name in names:
+                cc = self._schema.columns_by_name.get(name)
+                if name not in self.col_names and cc is not None and isinstance(cc.spec, StructSpec):
+                    values = self[name][start:stop]
+                    arrays.append(pa.array(values, type=self._pa_type_from_spec(pa, cc.spec)))
+                    continue
                 col = self[name]
                 if col.is_list:
                     spec = self._schema.columns_by_name[name].spec
@@ -3142,6 +3789,34 @@ def iter_arrow_batches(
                     values = col[start:stop]  # list of str/bytes/None
                     arrays.append(pa.array(values, type=self._pa_type_from_spec(pa, spec)))
                     continue
+                if col.is_dictionary:
+                    dc = self._cols[name]  # DictionaryColumn
+                    spec = self._schema.columns_by_name[name].spec
+                    # Get physical positions for live rows in [start, stop)
+                    valid = self._valid_rows
+                    real_pos = blosc2.where(valid, _arange(len(valid))).compute()
+                    batch_real_pos = real_pos[start:stop]
+                    if len(batch_real_pos) == 0:
+                        pa_dict = pa.array(dc.dictionary, type=pa.string())
+                        pa_indices = pa.array([], type=pa.int32())
+                        arrays.append(
+                            pa.DictionaryArray.from_arrays(pa_indices, pa_dict, ordered=spec.ordered)
+                        )
+                    else:
+                        raw_codes = np.asarray(dc.codes[batch_real_pos], dtype=np.int32)
+                        null_mask = raw_codes == np.int32(spec.null_code)
+                        safe_codes = raw_codes.copy()
+                        safe_codes[null_mask] = 0
+                        pa_dict = pa.array(dc.dictionary, type=pa.string())
+                        pa_indices = pa.array(
+                            safe_codes,
+                            type=pa.int32(),
+                            mask=null_mask if null_mask.any() else None,
+                        )
+                        arrays.append(
+                            pa.DictionaryArray.from_arrays(pa_indices, pa_dict, ordered=spec.ordered)
+                        )
+                    continue
                 arr = np.asarray(col[start:stop])
                 nv = col.null_value
                 null_mask = col._null_mask_for(arr) if nv is not None else None
@@ -3175,7 +3850,7 @@ def iter_arrow_batches(
                     )
                 else:
                     arrays.append(pa.array(arr, mask=null_mask if has_nulls else None))
-            yield pa.RecordBatch.from_arrays(arrays, names=names)
+            yield pa.RecordBatch.from_arrays(arrays, names=arrow_names)
 
     def to_arrow(self):
         """Convert all live rows to a :class:`pyarrow.Table`."""
@@ -3191,6 +3866,9 @@ def _auto_null_sentinel(pa, pa_type, *, null_policy: NullPolicy):
     @staticmethod
     def _arrow_type_needs_object_fallback(pa, pa_type) -> bool:
         """True when *pa_type* has no typed CTable mapping."""
+        if pa.types.is_dictionary(pa_type):
+            vt = pa_type.value_type
+            return vt not in (pa.string(), pa.large_string(), pa.utf8(), pa.large_utf8())
         if pa_type in (
             pa.int8(),
             pa.int16(),
@@ -3230,6 +3908,47 @@ def _arrow_type_to_spec(  # noqa: C901
     ):
         import blosc2.schema as b2s
 
+        # Handle Arrow dictionary types (dict-encoded strings)
+        if pa.types.is_dictionary(pa_type):
+            vt = pa_type.value_type
+            if vt in (pa.string(), pa.large_string(), pa.utf8(), pa.large_utf8()):
+                index_type = pa_type.index_type
+                # Accept signed and unsigned integer index types; validate fit in int32.
+                if not (pa.types.is_integer(index_type) or pa.types.is_unsigned_integer(index_type)):
+                    raise TypeError(
+                        f"Dictionary column has unsupported index type {index_type!r}; "
+                        "expected an integer type."
+                    )
+                if arrow_col is not None:
+                    # Validate all indices fit in signed int32.
+                    if pa.types.is_unsigned_integer(index_type):
+                        max_idx = arrow_col.combine_chunks().indices.to_pandas().max(skipna=True)
+                        if max_idx is not None and max_idx > np.iinfo(np.int32).max:
+                            raise ValueError(
+                                f"Arrow dictionary column has unsigned indices exceeding int32.max "
+                                f"(max={max_idx})."
+                            )
+                    combined = (
+                        arrow_col.combine_chunks() if hasattr(arrow_col, "combine_chunks") else arrow_col
+                    )
+                    n_cats = len(combined.dictionary)
+                    if n_cats > np.iinfo(np.int32).max:
+                        raise OverflowError(
+                            f"Arrow dictionary has {n_cats} categories, exceeding int32 capacity."
+                        )
+                return b2s.dictionary(
+                    index_type=b2s.int32(),
+                    value_type=b2s.vlstring(),
+                    ordered=bool(pa_type.ordered),
+                    nullable=nullable,
+                )
+            if object_fallback:
+                return b2s.object(nullable=nullable)
+            raise TypeError(
+                f"No blosc2 spec for Arrow dictionary type {pa_type!r} with "
+                f"value type {pa_type.value_type!r}. Only string dictionary values are supported in v1."
+            )
+
         mapping = [
             (pa.int8(), b2s.int8),
             (pa.int16(), b2s.int16),
@@ -3258,10 +3977,9 @@ def _arrow_type_to_spec(  # noqa: C901
 
         if pa.types.is_list(pa_type) or pa.types.is_large_list(pa_type):
             if arrow_col is not None:
-                py_values = arrow_col.to_pylist()
-                flat_values = [item for cell in py_values if cell is not None for item in cell]
-                item_arrow_col = pa.array(flat_values, type=pa_type.value_type)
-                nullable = nullable or any(v is None for v in py_values)
+                combined = arrow_col.combine_chunks() if hasattr(arrow_col, "combine_chunks") else arrow_col
+                item_arrow_col = combined.values
+                nullable = nullable or combined.null_count > 0
             else:
                 item_arrow_col = None
                 nullable = True
@@ -3354,10 +4072,12 @@ def _compiled_columns_from_arrow(
             arrow_col = table_for_inference.column(name) if table_for_inference is not None else None
             field_is_list = pa.types.is_list(field.type) or pa.types.is_large_list(field.type)
             field_is_struct = pa.types.is_struct(field.type)
+            field_is_dictionary = pa.types.is_dictionary(field.type)
             column_string_max_length = cls._string_max_length_for_column(string_max_length, name)
             field_is_varlen_scalar = (
                 not field_is_list
                 and not field_is_struct
+                and not field_is_dictionary
                 and column_string_max_length is None
                 and (
                     pa.types.is_string(field.type)
@@ -3372,7 +4092,9 @@ def _compiled_columns_from_arrow(
             field_is_object_fallback = object_fallback and field_needs_object_fallback
             null_value = None
             has_null_value_override = name in column_null_values
-            if has_null_value_override and (field_is_list or field_is_struct or field_is_object_fallback):
+            if has_null_value_override and (
+                field_is_list or field_is_struct or field_is_dictionary or field_is_object_fallback
+            ):
                 raise TypeError(f"column_null_values only supports scalar columns; {name!r} is not scalar")
             if has_null_value_override and field_is_varlen_scalar:
                 raise TypeError(
@@ -3385,7 +4107,11 @@ def _compiled_columns_from_arrow(
                 auto_null_sentinels
                 and field.nullable
                 and not (
-                    field_is_list or field_is_struct or field_is_varlen_scalar or field_is_object_fallback
+                    field_is_list
+                    or field_is_struct
+                    or field_is_dictionary
+                    or field_is_varlen_scalar
+                    or field_is_object_fallback
                 )
             ):
                 null_value = cls._auto_null_sentinel(pa, field.type, null_policy=null_policy)
@@ -3393,7 +4119,11 @@ def _compiled_columns_from_arrow(
                 arrow_col is not None
                 and arrow_col.null_count
                 and not (
-                    field_is_list or field_is_struct or field_is_varlen_scalar or field_is_object_fallback
+                    field_is_list
+                    or field_is_struct
+                    or field_is_dictionary
+                    or field_is_varlen_scalar
+                    or field_is_object_fallback
                 )
                 and null_value is None
             ):
@@ -3411,7 +4141,11 @@ def _compiled_columns_from_arrow(
                 object_fallback=object_fallback,
             )
             if null_value is not None and not (
-                field_is_list or field_is_struct or field_is_varlen_scalar or field_is_object_fallback
+                field_is_list
+                or field_is_struct
+                or field_is_dictionary
+                or field_is_varlen_scalar
+                or field_is_object_fallback
             ):
                 cls._validate_null_value_for_spec(name, spec, null_value)
             columns.append(cls._compiled_column_from_spec(name, spec))
@@ -3467,7 +4201,7 @@ def _create_arrow_import_columns(
         new_valid = storage.create_valid_rows(
             shape=(capacity,), chunks=default_chunks, blocks=default_blocks
         )
-        new_cols: dict[str, blosc2.NDArray | ListArray | _ScalarVarLenArray] = {}
+        new_cols: dict[str, blosc2.NDArray | ListArray | _ScalarVarLenArray | DictionaryColumn] = {}
         for col in columns:
             if cls._is_list_column(col):
                 new_cols[col.name] = storage.create_list_column(
@@ -3477,6 +4211,10 @@ def _create_arrow_import_columns(
                 new_cols[col.name] = storage.create_varlen_scalar_column(
                     col.name, spec=col.spec, cparams=cparams, dparams=dparams
                 )
+            elif cls._is_dictionary_column(col):
+                new_cols[col.name] = storage.create_dictionary_column(
+                    col.name, spec=col.spec, cparams=cparams, dparams=dparams
+                )
             else:
                 chunks, blocks = default_chunks, default_blocks
                 if col.dtype is not None:
@@ -3517,34 +4255,117 @@ def _new_arrow_import_ctable(
         obj._last_pos = 0
         return obj
 
+    @staticmethod
+    def _timestamp_normalizer_for_spec(spec: SchemaSpec):  # noqa: C901
+        """Build a trusted Arrow-import normalizer for timestamp leaves.
+
+        Arrow already validates list/struct values during import, so list columns
+        normally skip Python-level coercion.  The exception is nested timestamps:
+        ``to_pylist()`` yields ``datetime``/``numpy.datetime64`` objects, while
+        msgpack-backed ListArray storage expects integer epoch offsets.  Return a
+        small normalizer that descends only into branches containing timestamps,
+        or ``None`` when no normalization is needed.
+        """
+        if isinstance(spec, timestamp):
+
+            def normalize_timestamp(value, unit=spec.unit):
+                if value is None:
+                    return None
+                if isinstance(value, (int, np.integer)):
+                    return int(value)
+                return np.datetime64(value).astype(f"datetime64[{unit}]").astype(np.int64).item()
+
+            return normalize_timestamp
+
+        if isinstance(spec, ListSpec):
+            item_normalizer = CTable._timestamp_normalizer_for_spec(spec.item_spec)
+            if item_normalizer is None:
+                return None
+
+            def normalize_list(value, item_normalizer=item_normalizer):
+                if value is None:
+                    return None
+                for i, item in enumerate(value):
+                    value[i] = item_normalizer(item)
+                return value
+
+            return normalize_list
+
+        if isinstance(spec, StructSpec):
+            field_normalizers = {
+                name: normalizer
+                for name, child in spec.fields.items()
+                if (normalizer := CTable._timestamp_normalizer_for_spec(child)) is not None
+            }
+            if not field_normalizers:
+                return None
+
+            def normalize_struct(value, field_normalizers=field_normalizers):
+                if value is None:
+                    return None
+                for name, normalizer in field_normalizers.items():
+                    if name in value:
+                        value[name] = normalizer(value[name])
+                return value
+
+            return normalize_struct
+
+        return None
+
     @classmethod
     def _write_arrow_batches(cls, obj, batches, columns, new_cols, new_valid) -> None:
         pos = 0
+        list_normalizers = {
+            col.name: cls._timestamp_normalizer_for_spec(col.spec)
+            for col in columns
+            if cls._is_list_column(col)
+        }
         for batch in batches:
             end = pos + len(batch)
             while end > len(new_valid):
                 obj._grow()
                 new_valid = obj._valid_rows
-            pos = cls._write_arrow_batch(batch, columns, new_cols, new_valid, pos)
+            pos = cls._write_arrow_batch(batch, columns, new_cols, new_valid, pos, list_normalizers)
         for col in columns:
-            if cls._is_list_column(col) or cls._is_varlen_scalar_column(col):
+            if (
+                cls._is_list_column(col)
+                or cls._is_varlen_scalar_column(col)
+                or cls._is_dictionary_column(col)
+            ):
                 new_cols[col.name].flush()
         obj._n_rows = pos
         obj._last_pos = pos
 
     @classmethod
-    def _write_arrow_batch(cls, batch, columns, new_cols, new_valid, pos: int) -> int:
+    def _write_arrow_batch(cls, batch, columns, new_cols, new_valid, pos: int, list_normalizers) -> int:
         m = len(batch)
         if m == 0:
             return pos
         for col in columns:
             arrow_col = batch.column(batch.schema.get_field_index(col.name))
             if cls._is_list_column(col):
+                if getattr(col.spec, "serializer", None) == "arrow":
+                    new_cols[col.name].extend_arrow(arrow_col)
+                    continue
                 # Trusted Arrow-import fast path: schema has already been inferred,
-                # so avoid Python-level per-item coercion/validation here.
-                new_cols[col.name].extend(arrow_col.to_pylist(), validate=False)
+                # so avoid Python-level per-item coercion.  If nested timestamps
+                # are present, normalize only those leaves before storing.
+                values = arrow_col.to_pylist()
+                normalizer = list_normalizers[col.name]
+                if normalizer is not None:
+                    values = [normalizer(value) for value in values]
+                new_cols[col.name].extend(values, validate=False)
             elif cls._is_varlen_scalar_column(col):
                 new_cols[col.name].extend(arrow_col.to_pylist())
+            elif cls._is_dictionary_column(col):
+                import pyarrow as _pa
+
+                if _pa.types.is_dictionary(arrow_col.type):
+                    # Arrow dictionary array: use unification algorithm.
+                    new_cols[col.name].extend_from_arrow(_pa, arrow_col, pos, m, ordered=col.spec.ordered)
+                else:
+                    # Plain string array: encode values into the dictionary.
+                    new_cols[col.name][pos : pos + m] = arrow_col.to_pylist()
             else:
                 new_cols[col.name][pos : pos + m] = cls._arrow_column_to_numpy(arrow_col, col)
         new_valid[pos : pos + m] = True
@@ -3596,8 +4417,157 @@ def _arrow_schema_metadata(schema) -> dict[str, Any]:
             arrow_meta["schema_ipc_base64"] = schema_ipc_base64
         return {"arrow": arrow_meta}
 
+    @staticmethod
+    def _nested_metadata_from_column_names(
+        column_names: list[str], *, empty_root_physical: str | None = None
+    ) -> dict:
+        logical_to_physical = {}
+        physical_to_storage = {}
+        for name in column_names:
+            logical_to_physical[name] = name
+            physical_to_storage[name] = f"_cols/{_column_name_to_relpath(name)}"
+        nested = {
+            "version": 1,
+            "logical_root": "",
+            "logical_to_physical": logical_to_physical,
+            "physical_to_storage": physical_to_storage,
+        }
+        if empty_root_physical:
+            logical_to_physical[""] = empty_root_physical
+            nested["root"] = {"logical": "", "physical": empty_root_physical}
+        return nested
+
+    # ------------------------------------------------------------------
+    # Unnamed-root list<struct<...>> detection and flattening helpers
+    # ------------------------------------------------------------------
+
+    @staticmethod
+    def _detect_unnamed_root_list_struct(pa, schema) -> bool:
+        """Return True iff *schema* qualifies for unnamed-root list<struct<...>> flattening.
+
+        Conditions (all must hold):
+        * exactly one top-level field;
+        * field name is ``""`` (the canonical unnamed Arrow root);
+        * field type is ``list<struct<...>>`` or ``large_list<struct<...>>``.
+        """
+        if len(schema) != 1:
+            return False
+        field = schema[0]
+        if field.name != "":
+            return False
+        t = field.type
+        if not (pa.types.is_list(t) or pa.types.is_large_list(t)):
+            return False
+        return pa.types.is_struct(t.value_type)
+
+    @staticmethod
+    def _inner_schema_for_unnamed_root(pa, schema):
+        """Extract the inner struct schema from a single unnamed root list<struct<...>> schema.
+
+        Returns a new Arrow schema whose top-level fields are the struct fields
+        of the list value type.  The nullable flag of the original unnamed field
+        is not propagated — individual struct child nullability applies.
+        """
+        field = schema[0]  # the unnamed "" field
+        struct_type = field.type.value_type  # struct type inside the list
+        return pa.schema(list(struct_type))
+
+    @staticmethod
+    def _flatten_root_list_struct_batches(pa, inner_schema, batches, max_rows: int | None = None):
+        """Yield flattened :class:`pyarrow.RecordBatch` objects from an unnamed root stream.
+
+        For each incoming batch (which has a single list<struct<...>> column),
+        flatten the outer list using ``ListArray.flatten()`` — which skips null
+        outer list rows — and convert the resulting struct array into a
+        :class:`~pyarrow.RecordBatch` whose columns correspond to the struct fields.
+
+        Parameters
+        ----------
+        pa:
+            The ``pyarrow`` module.
+        inner_schema:
+            Arrow schema for the inner struct (output of
+            :meth:`_inner_schema_for_unnamed_root`).
+        batches:
+            Iterable of incoming :class:`~pyarrow.RecordBatch` objects from the
+            unnamed-root Parquet file.
+        max_rows:
+            Optional maximum number of flattened element rows to yield.
+        """
+        rows_seen = 0
+        for batch in batches:
+            if max_rows is not None and rows_seen >= max_rows:
+                break
+            list_array = batch.column(0)
+            # flatten() skips null outer list rows and concatenates element values
+            struct_values = list_array.flatten()
+            if max_rows is not None:
+                remaining = max_rows - rows_seen
+                if len(struct_values) > remaining:
+                    struct_values = struct_values.slice(0, remaining)
+            n_values = len(struct_values)
+            if n_values == 0:
+                # Emit an empty record batch that still carries the inner schema
+                empty_arrays = [pa.array([], type=f.type) for f in inner_schema]
+                yield pa.record_batch(empty_arrays, schema=inner_schema)
+                continue
+            rows_seen += n_values
+            yield pa.RecordBatch.from_struct_array(struct_values)
+
+    @staticmethod
+    def _flatten_arrow_struct_schema(pa, schema):
+        """Flatten top-level struct fields into dotted leaf fields recursively."""
+
+        out_fields = []
+
+        def _walk(field, prefix: tuple[str, ...] = (), parent_nullable: bool = False):
+            parts = (*prefix, field.name)
+            name = join_field_path(parts)
+            nullable = bool(parent_nullable or field.nullable)
+            if pa.types.is_struct(field.type):
+                for child in field.type:
+                    _walk(pa.field(child.name, child.type, nullable=child.nullable), parts, nullable)
+            else:
+                out_fields.append(pa.field(name, field.type, nullable=nullable))
+
+        for f in schema:
+            _walk(f)
+        return pa.schema(out_fields, metadata=schema.metadata)
+
+    @staticmethod
+    def _flatten_arrow_struct_batch(pa, batch, flat_schema):
+        arrays = []
+
+        def _extract(array, arr_type, parts):
+            if not parts:
+                return array
+            head = parts[0]
+            if pa.types.is_struct(arr_type):
+                return _extract(array.field(head), arr_type[head].type, parts[1:])
+            raise KeyError("Invalid flattened path")
+
+        for field in flat_schema:
+            parts = split_field_path(field.name)
+            col = batch.column(batch.schema.get_field_index(parts[0]))
+            arr = _extract(col, col.type, parts[1:])
+            arrays.append(arr)
+        return pa.RecordBatch.from_arrays(arrays, schema=flat_schema)
+
     @classmethod
-    def from_arrow(
+    def _flatten_arrow_struct_input(cls, pa, schema, batches):
+        """Return flattened (schema, batches, flattened) for struct-containing Arrow inputs."""
+        if not any(pa.types.is_struct(f.type) for f in schema):
+            return schema, batches, False
+        flat_schema = cls._flatten_arrow_struct_schema(pa, schema)
+
+        def _gen():
+            for b in batches:
+                yield cls._flatten_arrow_struct_batch(pa, b, flat_schema)
+
+        return flat_schema, _gen(), True
+
+    @classmethod
+    def from_arrow(  # noqa: C901
         cls,
         schema,
         batches,
@@ -3612,19 +4582,37 @@ def from_arrow(
         auto_null_sentinels: bool = True,
         blosc2_batch_size: int | None = _BATCH_SIZE_DEFAULT,
         blosc2_items_per_block: int | None = None,
+        list_serializer: Literal["msgpack", "arrow"] = "msgpack",
         object_fallback: bool = False,
         column_cparams: Mapping[str, dict[str, Any]] | None = None,
+        separate_nested_cols: bool = False,
     ) -> CTable:
         """Build a :class:`CTable` from an Arrow schema and iterable of record batches.
 
+        **Nested struct flattening**: top-level Arrow ``struct<…>`` fields are
+        automatically and recursively flattened into dotted leaf columns.  For
+        example, a field ``trip: struct<begin: struct<lon: float64, lat: float64>>``
+        becomes two CTable columns ``trip.begin.lon`` and ``trip.begin.lat``.
+        Each leaf is stored as an independent compressed :class:`~blosc2.NDArray`.
+        Row reads via ``t[i]`` reconstruct the original nested dict shape.  Use
+        ``t["trip.begin.lon"]`` or ``t.trip.begin.lon`` to access a leaf::
+
+            import pyarrow as pa, blosc2
+            trip_type = pa.struct([("begin", pa.struct([("lon", pa.float64())]))])
+            schema = pa.schema([pa.field("trip", trip_type)])
+            t = blosc2.CTable.from_arrow(schema, batches)
+            t.col_names          # ['trip.begin.lon']
+            t["trip.begin.lon"].mean()
+            t.trip.begin.lon.max()
+
         When *string_max_length* is ``None`` (the default), scalar Arrow
         ``string`` / ``large_string`` columns are imported as
         :func:`~blosc2.vlstring` columns and ``binary`` / ``large_binary``
-        columns are imported as :func:`~blosc2.vlbytes` columns.  Arrow
-        ``struct`` columns are imported as :func:`~blosc2.struct` columns backed
-        by batched variable-length storage.  Null values for these variable-
-        length scalar columns are represented as native ``None`` with no
-        sentinel needed.
+        columns are imported as :func:`~blosc2.vlbytes` columns.  Non-struct
+        ``struct`` columns (not containing only scalar leaves) are imported as
+        :func:`~blosc2.struct` columns backed by batched variable-length
+        storage.  Null values for these variable-length scalar columns are
+        represented as native ``None`` with no sentinel needed.
 
         When *string_max_length* is set to a positive integer, scalar string
         and binary columns are imported as fixed-width
@@ -3639,6 +4627,10 @@ def from_arrow(
         schema-less ``object`` columns) are flushed to their backend.  Set it to
         ``None`` to keep those columns pending until the final flush.
 
+        ``list_serializer`` selects the backend serializer for imported list
+        columns. ``"msgpack"`` is the default; ``"arrow"`` stores Arrow list
+        batches directly and can be much faster for deeply nested list columns.
+
         Unsupported Arrow types raise by default.  Pass ``object_fallback=True``
         to import such columns as schema-less :func:`~blosc2.object` columns.
         This fallback is intentionally not used by :meth:`from_parquet`.
@@ -3652,13 +4644,57 @@ def from_arrow(
             raise ValueError("blosc2_batch_size must be a positive integer or None")
         if blosc2_items_per_block is not None and blosc2_items_per_block <= 0:
             raise ValueError("blosc2_items_per_block must be a positive integer or None")
+        if list_serializer not in {"msgpack", "arrow"}:
+            raise ValueError("list_serializer must be 'msgpack' or 'arrow'")
+
+        # ------------------------------------------------------------------
+        # Unnamed-root list<struct<...>> flattening (opt-in)
+        # ------------------------------------------------------------------
+        # When the source schema is a single unnamed "" field of type
+        # list<struct<...>>, the outer list is a physical Parquet/Awkward
+        # chunking artifact, not a semantic column.  Flatten it so that each
+        # element becomes a CTable row.  The struct fields become ordinary
+        # top-level columns and are further flattened by the struct-leaf
+        # machinery below.
+        original_root_metadata: dict | None = None
+        if separate_nested_cols and cls._detect_unnamed_root_list_struct(pa, schema):
+            inner_schema = cls._inner_schema_for_unnamed_root(pa, schema)
+            batches = cls._flatten_root_list_struct_batches(pa, inner_schema, batches)
+            schema = inner_schema
+            original_root_metadata = {
+                "kind": "unnamed_list_struct",
+                "field_name": "",
+                "preserve_grouping": False,
+            }
+
         batches = iter(batches)
         first_batch = None
         table_for_inference = None
+        original_top_level_struct_specs: dict[str, SchemaSpec] = {}
+        for f in schema:
+            if pa.types.is_struct(f.type):
+                original_top_level_struct_specs[join_field_path((f.name,))] = cls._arrow_type_to_spec(
+                    pa, f.type, nullable=f.nullable, object_fallback=object_fallback
+                )
         if string_max_length is None or isinstance(string_max_length, Mapping):
             first_batch = next(batches, None)
-            if first_batch is not None:
-                table_for_inference = pa.Table.from_batches([first_batch], schema=schema)
+
+        # Flatten top-level Arrow structs into dotted leaf columns so CTable can
+        # persist nested scalar leaves as physical columns.
+        flattened_structs = False
+        if first_batch is not None:
+            import itertools as _it
+
+            schema, flat_batches, flattened_structs = cls._flatten_arrow_struct_input(
+                pa, schema, _it.chain([first_batch], batches)
+            )
+            batches = iter(flat_batches)
+            first_batch = next(batches, None)
+        else:
+            schema, batches, flattened_structs = cls._flatten_arrow_struct_input(pa, schema, batches)
+
+        if first_batch is not None:
+            table_for_inference = pa.Table.from_batches([first_batch], schema=schema)
         columns = cls._compiled_columns_from_arrow(
             pa,
             schema,
@@ -3669,24 +4705,67 @@ def from_arrow(
         )
         cls._apply_arrow_column_cparams(columns, column_cparams)
         for col in columns:
-            if (
-                cls._is_list_column(col) and getattr(col.spec, "storage", None) == "batch"
-            ) or cls._is_varlen_scalar_column(col):
+            if cls._is_list_column(col):
+                if getattr(col.spec, "storage", None) == "batch":
+                    col.spec.serializer = list_serializer
+                    if blosc2_batch_size is not None:
+                        col.spec.batch_rows = blosc2_batch_size
+                    if blosc2_items_per_block is not None:
+                        col.spec.items_per_block = blosc2_items_per_block
+            elif cls._is_varlen_scalar_column(col):
                 if blosc2_batch_size is not None:
                     col.spec.batch_rows = blosc2_batch_size
                 if blosc2_items_per_block is not None:
                     col.spec.items_per_block = blosc2_items_per_block
+        metadata = cls._arrow_schema_metadata(schema)
+        empty_root_physical = None
+        schema_meta = getattr(schema, "metadata", None) or {}
+        root_key = b"blosc2_empty_root_physical"
+        if root_key in schema_meta:
+            raw = schema_meta[root_key]
+            empty_root_physical = raw.decode() if isinstance(raw, bytes) else str(raw)
+        metadata["nested"] = cls._nested_metadata_from_column_names(
+            [col.name for col in columns], empty_root_physical=empty_root_physical
+        )
+        if flattened_structs:
+            metadata["nested"]["reconstruct_rows"] = True
+        if original_root_metadata is not None:
+            metadata["nested"]["original_root"] = original_root_metadata
+        compiled_columns_by_name = {col.name: col for col in columns}
+        for name, spec in original_top_level_struct_specs.items():
+            if name in compiled_columns_by_name:
+                continue
+            compiled_columns_by_name[name] = CompiledColumn(
+                name=name,
+                py_type=spec.python_type,
+                spec=spec,
+                dtype=getattr(spec, "dtype", None),
+                default=MISSING,
+                config=ColumnConfig(cparams=None, dparams=None, chunks=None, blocks=None),
+                display_width=compute_display_width(spec),
+            )
+
         compiled = CompiledSchema(
             row_cls=None,
             columns=columns,
-            columns_by_name={col.name: col for col in columns},
-            metadata=cls._arrow_schema_metadata(schema),
+            columns_by_name=compiled_columns_by_name,
+            metadata=metadata,
         )
         if first_batch is not None:
             import itertools as _it
 
             batches = _it.chain([first_batch], batches)
-        capacity = max(capacity_hint or 1, 1)
+        # Use capacity_hint to size initial NDArray chunks/blocks correctly.
+        # When capacity_hint is None and we are in the unnamed-root flatten path,
+        # fall back to _EXPECTED_SIZE_DEFAULT (1 M) so that compute_chunks_blocks
+        # produces a reasonable block size instead of (1,) which causes catastrophic
+        # storage fragmentation.  For non-unnamed-root imports capacity_hint is
+        # always supplied by from_parquet (pf.metadata.num_rows), so the fallback
+        # only matters for direct from_arrow() calls without a hint.
+        if capacity_hint is None and original_root_metadata is not None:
+            capacity = _EXPECTED_SIZE_DEFAULT
+        else:
+            capacity = max(capacity_hint or 1, 1)
         storage = cls._storage_for_arrow_import(urlpath, mode)
         new_cols, new_valid = cls._create_arrow_import_columns(storage, columns, capacity, cparams, dparams)
         storage.save_schema(schema_to_dict(compiled))
@@ -3727,7 +4806,7 @@ def to_parquet(
                 writer.write_table(table, row_group_size=row_group_size or len(batch))
 
     @classmethod
-    def from_parquet(
+    def from_parquet(  # noqa: C901
         cls,
         path,
         *,
@@ -3741,6 +4820,9 @@ def from_parquet(
         auto_null_sentinels: bool = True,
         blosc2_batch_size: int | None = _BATCH_SIZE_DEFAULT,
         blosc2_items_per_block: int | None = None,
+        list_serializer: Literal["msgpack", "arrow"] = "arrow",
+        separate_nested_cols: bool = True,
+        max_rows: int | None = None,
         **kwargs,
     ) -> CTable:
         """Read a Parquet file into a :class:`CTable`.
@@ -3751,11 +4833,24 @@ def from_parquet(
 
         This method delegates the actual table construction to
         :meth:`CTable.from_arrow`, so Arrow schema handling, nullable-column support,
-        and Blosc2 write tuning follow the same rules as that method.  Top-level
-        Arrow ``struct<...>`` columns are imported as :func:`~blosc2.struct`
-        columns backed by batched variable-length storage.  Unsupported Parquet
-        types are not silently imported as schema-less :func:`~blosc2.object`
-        columns; they raise so callers can decide how to handle them explicitly.
+        and Blosc2 write tuning follow the same rules as that method.
+
+        **Nested struct flattening**: top-level Parquet ``struct<…>`` fields are
+        automatically and recursively flattened into dotted leaf columns — the same
+        as in :meth:`from_arrow`.  For example, a Parquet file that contains a column
+        ``trip: struct<begin: struct<lon: double, lat: double>>`` produces two CTable
+        columns ``trip.begin.lon`` and ``trip.begin.lat``.  Row reads reconstruct the
+        original nested dict shape; individual leaves are accessed via dotted names or
+        attribute-chain proxies::
+
+            t = blosc2.CTable.from_parquet("trips.parquet")
+            t.col_names               # e.g. ['trip.begin.lon', 'trip.begin.lat', ...]
+            t["trip.begin.lon"].mean()
+            t.trip.begin.lon.max()
+
+        Unsupported Parquet types are not silently imported as schema-less
+        :func:`~blosc2.object` columns; they raise so callers can decide how to
+        handle them explicitly.
 
         Parameters
         ----------
@@ -3805,7 +4900,31 @@ def from_parquet(
 
         blosc2_items_per_block : int or None, optional
             Target number of items per internal Blosc2 block. Passed through to
-            :meth:`CTable.from_arrow`.
+            :meth:`CTable.from_arrow`.  In general, larger number of items
+            favors compression ratios but make random access slower.
+
+        list_serializer : {"msgpack", "arrow"}, optional
+            Serializer used for imported list columns. The default, ``"arrow"``,
+            stores Arrow list batches directly and is much faster for deeply nested
+            or ``list<struct<...>>`` columns. The tradeoff is that accessing those
+            list columns later requires PyArrow. Use ``"msgpack"`` to keep
+            list-column stores independent of PyArrow at read time; it can be
+            smaller for simple lists but is much slower and more memory-intensive
+            for deeply nested data.
+
+        separate_nested_cols : bool, optional
+            Whether to separate qualifying nested columns during import. Defaults to
+            ``True``. In particular, a single unnamed top-level
+            ``list<struct<...>>`` field is treated as a root record stream: each list
+            element becomes a CTable row and struct leaves become ordinary nested
+            CTable columns. Use ``separate_nested_cols=False`` when closer fidelity to
+            the original Parquet row/schema shape is more important than the separated
+            column layout.
+
+        max_rows : int or None, optional
+            Maximum number of rows to import. For ordinary Parquet files this limits
+            Parquet/CTable rows. For unnamed-root ``list<struct<...>>`` files imported
+            with ``separate_nested_cols=True``, this limits flattened element rows.
 
         **kwargs
             Additional keyword arguments forwarded to ``pyarrow.parquet.ParquetFile``.
@@ -3824,6 +4943,8 @@ def from_parquet(
             If :mod:`pyarrow` is not installed.
         ValueError
             If ``batch_size`` is not greater than 0.
+        ValueError
+            If ``max_rows`` is negative.
         ValueError
             If ``columns`` contains duplicate names.
         Exception
@@ -3863,6 +4984,8 @@ def from_parquet(
         pq = cls._require_pyarrow_parquet("from_parquet()")
         pa = cls._require_pyarrow("from_parquet()")
         cls._validate_arrow_batch_size(batch_size)
+        if max_rows is not None and max_rows < 0:
+            raise ValueError("max_rows must be non-negative")
         string_max_length = kwargs.pop("string_max_length", None)
         pf = pq.ParquetFile(path, **kwargs)
         arrow_schema = pf.schema_arrow
@@ -3872,6 +4995,114 @@ def from_parquet(
             fields = [arrow_schema.field(name) for name in columns]
             arrow_schema = pa.schema(fields)
         batches = pf.iter_batches(batch_size=batch_size, columns=columns)
+
+        # Parquet files generated by Awkward-style pipelines may contain an
+        # unnamed top-level field (""). When separate_nested_cols=True and the
+        # schema qualifies as an unnamed-root list<struct<...>>, skip the
+        # rename-to-root logic and pass the original schema directly to
+        # from_arrow, which will perform the element-level flattening.
+        # Otherwise, normalize empty column names to non-empty names as before.
+        _is_unnamed_root_flatten = separate_nested_cols and cls._detect_unnamed_root_list_struct(
+            pa, arrow_schema
+        )
+        if not _is_unnamed_root_flatten and any(name == "" for name in arrow_schema.names):
+            used = {n for n in arrow_schema.names if n}
+
+            def _fresh_root_name() -> str:
+                base = "root"
+                if base not in used:
+                    used.add(base)
+                    return base
+                i = 1
+                while True:
+                    candidate = f"{base}_{i}"
+                    if candidate not in used:
+                        used.add(candidate)
+                        return candidate
+                    i += 1
+
+            original_names = list(arrow_schema.names)
+            renamed = [_fresh_root_name() if n == "" else n for n in original_names]
+            arrow_schema = pa.schema(
+                [arrow_schema.field(i).with_name(renamed[i]) for i in range(len(renamed))]
+            )
+            # Preserve canonical unnamed-root intent in schema metadata.
+            try:
+                first_root = next(renamed[i] for i, old in enumerate(original_names) if old == "")
+            except StopIteration:
+                first_root = renamed[0] if renamed else "root"
+            current_meta = dict(arrow_schema.metadata or {})
+            current_meta[b"blosc2_empty_root_physical"] = first_root.encode()
+            arrow_schema = arrow_schema.with_metadata(current_meta)
+
+            def _renamed_batches(batch_iter, names):
+                for b in batch_iter:
+                    yield b.rename_columns(names)
+
+            batches = _renamed_batches(batches, renamed)
+
+        def _limited_batches(batch_iter, limit: int):
+            rows_seen = 0
+            for batch in batch_iter:
+                if rows_seen >= limit:
+                    break
+                remaining = limit - rows_seen
+                if len(batch) > remaining:
+                    batch = batch.slice(0, remaining)
+                rows_seen += len(batch)
+                yield batch
+
+        # For unnamed-root flattening, max_rows applies to flattened element rows,
+        # not to the outer Parquet rows.  Pre-flatten here when a limit is requested
+        # so the limit can be enforced precisely before handing batches to from_arrow.
+        if _is_unnamed_root_flatten and max_rows is not None:
+            inner_schema = cls._inner_schema_for_unnamed_root(pa, arrow_schema)
+            limited_flat_batches = cls._flatten_root_list_struct_batches(
+                pa, inner_schema, batches, max_rows=max_rows
+            )
+            ct = cls.from_arrow(
+                inner_schema,
+                limited_flat_batches,
+                urlpath=urlpath,
+                mode=mode,
+                cparams=cparams,
+                dparams=dparams,
+                validate=validate,
+                capacity_hint=max_rows,
+                string_max_length=string_max_length,
+                auto_null_sentinels=auto_null_sentinels,
+                blosc2_batch_size=blosc2_batch_size,
+                blosc2_items_per_block=blosc2_items_per_block,
+                list_serializer=list_serializer,
+                separate_nested_cols=False,
+            )
+            nested_meta = ct._schema.metadata.get("nested", {})
+            nested_meta["original_root"] = {
+                "kind": "unnamed_list_struct",
+                "field_name": "",
+                "preserve_grouping": False,
+            }
+            ct._schema.metadata["nested"] = nested_meta
+            ct._storage.save_schema(schema_to_dict(ct._schema))
+            return ct
+
+        if max_rows is not None:
+            batches = _limited_batches(batches, max_rows)
+
+        # When flattening a root list<struct<...>>, the actual element count is not
+        # known ahead of time.  Pass capacity_hint=None so that from_arrow falls back
+        # to _EXPECTED_SIZE_DEFAULT (1 M), which gives compute_chunks_blocks() a
+        # reasonable block size instead of the catastrophic (1, 1) produced by
+        # capacity=1.  The CLI path computes a better estimate by sampling.
+        if _is_unnamed_root_flatten:
+            _capacity_hint = None
+        elif pf.metadata is not None:
+            _capacity_hint = (
+                pf.metadata.num_rows if max_rows is None else min(max_rows, pf.metadata.num_rows)
+            )
+        else:
+            _capacity_hint = max_rows
+
         return cls.from_arrow(
             arrow_schema,
             batches,
@@ -3880,11 +5111,13 @@ def from_parquet(
             cparams=cparams,
             dparams=dparams,
             validate=validate,
-            capacity_hint=pf.metadata.num_rows if pf.metadata is not None else None,
+            capacity_hint=_capacity_hint,
             string_max_length=string_max_length,
             auto_null_sentinels=auto_null_sentinels,
             blosc2_batch_size=blosc2_batch_size,
             blosc2_items_per_block=blosc2_items_per_block,
+            list_serializer=list_serializer,
+            separate_nested_cols=separate_nested_cols,
         )
 
     # ------------------------------------------------------------------
@@ -4250,6 +5483,16 @@ def rename_column(self, old: str, new: str) -> None:
 
         On disk tables the corresponding persisted column leaf is renamed.
 
+        Renaming a flat column to a dotted name (e.g. ``"trip.begin.lon"``)
+        promotes it to a nested leaf column: it will be stored under the
+        hierarchical path ``/_cols/trip/begin/lon`` on disk and can be
+        accessed via ``t["trip.begin.lon"]`` or the attribute-chain proxy
+        ``t.trip.begin.lon``.  This is the primary way to define nested
+        columns when importing from non-Arrow sources::
+
+            t.rename_column("trip_begin_lon", "trip.begin.lon")
+            t["trip.begin.lon"].mean()   # works as a regular Column
+
         Raises
         ------
         ValueError
@@ -4387,7 +5630,9 @@ def _fetch_col_at_positions(self, name: str, positions: np.ndarray):
             )
         col = self._cols[name]
         spec = self._schema.columns_by_name[name].spec
-        if self._is_list_spec(spec) or isinstance(spec, (VLStringSpec, VLBytesSpec, StructSpec, ObjectSpec)):
+        if self._is_list_spec(spec) or isinstance(
+            spec, (VLStringSpec, VLBytesSpec, StructSpec, ObjectSpec, DictionarySpec)
+        ):
             return col[positions]
         values = col[positions]
         if isinstance(spec, timestamp):
@@ -4848,7 +6093,11 @@ def _structured_array_dtype(self) -> np.dtype:
             col_info = self._schema.columns_by_name.get(name)
             if col_info is None:
                 dtype = np.asarray(self[name][:0]).dtype
-            elif self._is_list_column(col_info) or self._is_varlen_scalar_column(col_info):
+            elif (
+                self._is_list_column(col_info)
+                or self._is_varlen_scalar_column(col_info)
+                or self._is_dictionary_column(col_info)
+            ):
                 dtype = np.dtype(object)
             else:
                 dtype = col_info.dtype if col_info.dtype is not None else np.dtype(object)
@@ -4867,6 +6116,36 @@ def __array__(self, dtype=None, copy=None):
             arr = arr.astype(dtype, copy=True if copy is None else copy)
         return arr.copy() if copy else arr
 
+    def _logical_to_physical_name(self, name: str) -> str:
+        """Resolve a user/logical column path to a stored physical column name."""
+        if name in self._cols or name in self._computed_cols:
+            return name
+        nested = self._schema.metadata.get("nested") if self._schema.metadata else None
+        if isinstance(nested, dict):
+            mapping = nested.get("logical_to_physical")
+            if isinstance(mapping, dict):
+                physical = mapping.get(name)
+                if isinstance(physical, str) and (physical in self._cols or physical in self._computed_cols):
+                    return physical
+        return name
+
+    def _expand_logical_column_selector(self, name: str) -> list[str]:
+        """Resolve one logical selector to one or more physical column names.
+
+        If *name* points to a scalar leaf, returns ``[leaf]``. If it points to
+        a struct-like prefix (e.g. ``"trip"``), expands to descendant leaves.
+        """
+        physical = self._logical_to_physical_name(name)
+        if physical in self._cols or physical in self._computed_cols:
+            return [physical]
+        prefix_parts = split_field_path(physical)
+        expanded = [
+            col for col in self.col_names if split_field_path(col)[: len(prefix_parts)] == prefix_parts
+        ]
+        if expanded:
+            return expanded
+        return [physical]
+
     def __getitem__(self, key):
         """Type-driven indexing for columns, rows, projections, and filters.
 
@@ -4874,7 +6153,10 @@ def __getitem__(self, key):
 
         - ``str``: return a :class:`Column` when it matches a stored or computed
           column name; otherwise evaluate it as a boolean expression via
-          :meth:`where`.
+          :meth:`where`.  Dotted names (e.g. ``"trip.begin.lon"``) select
+          nested leaf columns directly; a struct-prefix name
+          (e.g. ``"trip.begin"``) that matches multiple descendant leaves returns
+          a :class:`_StructPathColumn` view.
         - boolean :class:`blosc2.LazyExpr` or :class:`blosc2.NDArray`: return the
           same filtered view as :meth:`where`, e.g. ``t[t.temperature_f > 70]``.
         - ``int``: return one live row as a namedtuple-like object.
@@ -4902,10 +6184,20 @@ def __getitem__(self, key):
         Project columns::
 
             slim = t[["sensor_id", "temperature_f"]]
+
+        Access a nested leaf column with a dotted name or an attribute chain::
+
+            lons = t["trip.begin.lon"]   # Column for the nested leaf
+            lons = t.trip.begin.lon      # equivalent attribute-chain form
         """
         if isinstance(key, str):
-            if key in self._cols or key in self._computed_cols:
-                return Column(self, key)
+            physical = self._logical_to_physical_name(key)
+            if physical in self._cols or physical in self._computed_cols:
+                return Column(self, physical)
+            expanded = self._expand_logical_column_selector(key)
+            cc = self._schema.columns_by_name.get(physical)
+            if len(expanded) > 1 or (expanded and cc is not None and isinstance(cc.spec, StructSpec)):
+                return _StructPathColumn(self, physical, expanded)
             return self.where(key)
         if isinstance(key, (blosc2.NDArray, blosc2.LazyExpr)) and getattr(key, "dtype", None) == np.bool_:
             return self.where(key)
@@ -4913,9 +6205,21 @@ def __getitem__(self, key):
             raise TypeError("Tuple indexing is not supported for CTable in V1")
         return self._getitem_row_selector(key)
 
+    def _nested_namespace(self, prefix: str):
+        prefix_parts = split_field_path(prefix)
+        for name in self.col_names:
+            parts = split_field_path(name)
+            if parts[: len(prefix_parts)] == prefix_parts and len(parts) > len(prefix_parts):
+                return _NestedColumnNamespace(self, prefix)
+        return None
+
     def __getattr__(self, s: str):
-        if s in self._cols or s in self._computed_cols:
-            return Column(self, s)
+        physical = self._logical_to_physical_name(s)
+        if physical in self._cols or physical in self._computed_cols:
+            return Column(self, physical)
+        ns = self._nested_namespace(s)
+        if ns is not None:
+            return ns
         return super().__getattribute__(s)
 
     # ------------------------------------------------------------------
@@ -4959,6 +6263,11 @@ def compact(self):
                 replacement.flush()
                 self._cols[name] = replacement
                 continue
+            if self._is_dictionary_column(col):
+                # Keep dictionary values intact; just compact the codes.
+                live_codes = np.asarray(v.codes[real_poss[: self._n_rows]], dtype=np.int32)
+                v.codes[: self._n_rows] = live_codes
+                continue
             start = 0
             block_size = self._valid_rows.blocks[0]
             end = min(block_size, self._n_rows)
@@ -4980,6 +6289,16 @@ def _normalise_sort_keys(
         """Validate and normalise sort key arguments; return (cols, ascending)."""
         if isinstance(cols, str):
             cols = [cols]
+
+        resolved_cols: list[str] = []
+        for name in cols:
+            expanded = self._expand_logical_column_selector(name)
+            if len(expanded) != 1:
+                raise ValueError(
+                    f"Sort key {name!r} resolves to multiple columns {expanded!r}; please choose a leaf column."
+                )
+            resolved_cols.append(expanded[0])
+        cols = resolved_cols
         if isinstance(ascending, bool):
             ascending = [ascending] * len(cols)
         if len(cols) != len(ascending):
@@ -5094,7 +6413,13 @@ def _build_lex_keys(
                 # Materialise computed column values at live positions
                 raw = np.asarray(cc["lazy"][:])[live_pos]
             else:
-                raw = self._cols[name][live_pos]
+                col_info = self._schema.columns_by_name.get(name)
+                if col_info is not None and self._is_dictionary_column(col_info):
+                    # Sort dictionary columns by decoded string values.
+                    decoded = self._cols[name][live_pos]
+                    raw = np.array(decoded, dtype=object)
+                else:
+                    raw = self._cols[name][live_pos]
             col_info = self._schema.columns_by_name.get(name)
             nv = getattr(col_info.spec, "null_value", None) if col_info else None
 
@@ -5136,7 +6461,12 @@ def sort_by(
         cols:
             Column name or list of column names to sort by.  When multiple
             columns are given, the first is the primary key, the second is
-            the tiebreaker, and so on.
+            the tiebreaker, and so on.  For tables with **nested (dotted)
+            column names**, pass the dotted leaf name directly::
+
+                t.sort_by("trip.begin.lon")
+                t.sort_by(["trip.begin.lon", "payment.fare"], ascending=[True, False])
+
         ascending:
             Sort direction.  A single bool applies to all keys; a list must
             have the same length as *cols*.
@@ -5186,37 +6516,52 @@ def sort_by(
             sorted_pos = live_pos[order]
 
         if inplace:
-            for col in self._schema.columns:
-                arr = self._cols[col.name]
-                if self._is_list_column(col):
-                    new_arr = ListArray(spec=col.spec)
-                    new_arr.extend((arr[int(pos)] for pos in sorted_pos), validate=False)
-                    new_arr.flush()
-                    self._cols[col.name] = new_arr
-                else:
-                    arr[:n] = arr[sorted_pos]
-            self._valid_rows[:n] = True
-            self._valid_rows[n:] = False
-            self._n_rows = n
-            self._last_pos = n
-            self._mark_all_indexes_stale()
+            self._sort_by_inplace(sorted_pos, n)
             return self
-        else:
-            # Build a new in-memory table with the sorted rows
-            result = self._empty_copy()
-            for col in self._schema.columns:
-                col_name = col.name
-                arr = self._cols[col_name]
-                if self._is_list_column(col):
-                    result._cols[col_name].extend((arr[int(pos)] for pos in sorted_pos), validate=False)
-                    result._cols[col_name].flush()
-                else:
-                    result._cols[col_name][:n] = arr[sorted_pos]
-            result._valid_rows[:n] = True
-            result._valid_rows[n:] = False
-            result._n_rows = n
-            result._last_pos = n
-            return result
+
+        return self._sorted_copy_from_positions(sorted_pos, n)
+
+    def _sort_by_inplace(self, sorted_pos: np.ndarray, n: int) -> None:
+        for col in self._schema.columns:
+            arr = self._cols[col.name]
+            if self._is_list_column(col):
+                new_arr = ListArray(spec=col.spec)
+                new_arr.extend((arr[int(pos)] for pos in sorted_pos), validate=False)
+                new_arr.flush()
+                self._cols[col.name] = new_arr
+            elif self._is_dictionary_column(col):
+                sorted_codes = np.asarray(arr.codes[sorted_pos], dtype=np.int32)
+                arr.codes[:n] = sorted_codes
+            else:
+                arr[:n] = arr[sorted_pos]
+        self._valid_rows[:n] = True
+        self._valid_rows[n:] = False
+        self._n_rows = n
+        self._last_pos = n
+        self._mark_all_indexes_stale()
+
+    def _sorted_copy_from_positions(self, sorted_pos: np.ndarray, n: int) -> CTable:
+        # Build a new in-memory table with the sorted rows
+        result = self._empty_copy()
+        for col in self._schema.columns:
+            col_name = col.name
+            arr = self._cols[col_name]
+            if self._is_list_column(col):
+                result._cols[col_name].extend((arr[int(pos)] for pos in sorted_pos), validate=False)
+                result._cols[col_name].flush()
+            elif self._is_dictionary_column(col):
+                # Copy dictionary values, then sorted codes.
+                for v in arr.dictionary:
+                    result._cols[col_name].encode(v)
+                sorted_codes = np.asarray(arr.codes[sorted_pos], dtype=np.int32)
+                result._cols[col_name].codes[:n] = sorted_codes
+            else:
+                result._cols[col_name][:n] = arr[sorted_pos]
+        result._valid_rows[:n] = True
+        result._valid_rows[n:] = False
+        result._n_rows = n
+        result._last_pos = n
+        return result
 
     def copy(
         self,
@@ -5287,6 +6632,13 @@ def copy(
                 src = (arr[int(pos)] for pos in live_pos) if compact else (arr[i] for i in range(n))
                 result._cols[col_name].extend(src, validate=False)
                 result._cols[col_name].flush()
+            elif self._is_dictionary_column(col):
+                # Copy dictionary values, then copy (live) codes.
+                for v in arr.dictionary:
+                    result._cols[col_name].encode(v)
+                pos_slice = live_pos if compact else np.arange(n, dtype=np.int64)
+                raw_codes = np.asarray(arr.codes[pos_slice], dtype=np.int32)
+                result._cols[col_name].codes[:n] = raw_codes
             else:
                 result._cols[col_name][:n] = arr[live_pos] if compact else arr[:n]
 
@@ -5324,6 +6676,20 @@ def _empty_copy(self, capacity: int | None = None) -> CTable:
                     cparams=col_storage.get("cparams"),
                     dparams=col_storage.get("dparams"),
                 )
+            elif self._is_varlen_scalar_column(col):
+                new_cols[col.name] = mem_storage.create_varlen_scalar_column(
+                    col.name,
+                    spec=col.spec,
+                    cparams=col_storage.get("cparams"),
+                    dparams=col_storage.get("dparams"),
+                )
+            elif self._is_dictionary_column(col):
+                new_cols[col.name] = mem_storage.create_dictionary_column(
+                    col.name,
+                    spec=col.spec,
+                    cparams=col_storage.get("cparams"),
+                    dparams=col_storage.get("dparams"),
+                )
             else:
                 new_cols[col.name] = mem_storage.create_column(
                     col.name,
@@ -5643,6 +7009,7 @@ def _resolve_index_catalog_entry(
         if col_name is not None and expression is not None:
             raise ValueError("col_name and expression are mutually exclusive")
         if col_name is not None:
+            col_name = self._logical_to_physical_name(col_name)
             if col_name not in catalog:
                 raise KeyError(f"No index found for column {col_name!r}.")
             return col_name, catalog[col_name]
@@ -5822,7 +7189,14 @@ def create_index(  # noqa: C901
         tmpdir: str | None = None,
         **kwargs,
     ) -> blosc2.Index:
-        """Build and register an index for a stored column or table expression."""
+        """Build and register an index for a stored column or table expression.
+
+        For tables with **nested (dotted) column names**, pass the dotted leaf
+        name directly::
+
+            t.create_index("trip.begin.lon")
+            t.where("trip.begin.lon > -87.7").nrows   # index is used automatically
+        """
         if self.base is not None:
             raise ValueError("Cannot create an index on a view.")
         if col_name is not None and field is not None:
@@ -5832,6 +7206,8 @@ def create_index(  # noqa: C901
         if operands is not None and expression is None:
             raise ValueError("operands can only be provided together with expression")
         col_name = field if field is not None else col_name
+        if col_name is not None:
+            col_name = self._logical_to_physical_name(col_name)
 
         from blosc2.indexing import (
             _IN_MEMORY_INDEXES,
@@ -5920,6 +7296,10 @@ def create_index(  # noqa: C901
                 f"Cannot create an index on variable-length scalar column {col_name!r}: "
                 "indexing for vlstring/vlbytes/struct/object columns is not supported yet."
             )
+        # Dictionary columns: index the underlying int32 codes array.
+        is_dictionary = isinstance(self._schema.columns_by_name[col_name].spec, DictionarySpec)
+        if is_dictionary:
+            col_arr = col_arr.codes  # index the int32 codes NDArray
         is_persistent = self._storage.index_anchor_path(col_name) is not None
 
         if is_persistent:
@@ -6303,6 +7683,9 @@ def info_items(self) -> list[tuple[str, object]]:
     @staticmethod
     def _dtype_info_label(dtype: np.dtype | None, spec: SchemaSpec | None = None) -> str:
         """Return a compact dtype label for info reports."""
+        if isinstance(spec, DictionarySpec):
+            ordered_tag = ", ordered" if spec.ordered else ""
+            return f"dictionary[str{ordered_tag}]"
         if isinstance(spec, VLStringSpec):
             return "vlstring"
         if isinstance(spec, VLBytesSpec):
@@ -6370,6 +7753,19 @@ def append(self, data: list | np.void | np.ndarray) -> None:
         Materialized columns whose values are omitted are auto-filled from
         their recorded expression.  Raises ``ValueError`` if the table is
         read-only or a view.
+
+        For tables with **nested (dotted) column names** the row dict may be
+        supplied either as a flat mapping of dotted keys or as a nested dict
+        that mirrors the original struct shape — both are accepted and
+        automatically flattened to the physical dotted leaf names::
+
+            # flat dotted keys
+            t.append({"trip.begin.lon": -87.6, "trip.begin.lat": 41.8,
+                      "payment.fare": 12.5})
+
+            # original nested dict (auto-flattened)
+            t.append({"trip": {"begin": {"lon": -87.6, "lat": 41.8}},
+                      "payment": {"fare": 12.5}})
         """
         if self._read_only:
             raise ValueError("Table is read-only (opened with mode='r').")
@@ -6395,12 +7791,15 @@ def append(self, data: list | np.void | np.ndarray) -> None:
             col_array = self._cols[name]
             if self._is_list_column(col) or self._is_varlen_scalar_column(col):
                 col_array.append(row[name])
+            elif self._is_dictionary_column(col):
+                col_array[pos] = row[name]  # DictionaryColumn encodes on __setitem__
             else:
                 col_array[pos] = row[name]
 
+        n_rows = self.nrows
         self._valid_rows[pos] = True
         self._last_pos = pos + 1
-        self._n_rows += 1
+        self._n_rows = n_rows + 1
         self._mark_all_indexes_stale()
 
     def delete(self, ind: int | slice | str | Iterable) -> None:
@@ -6426,10 +7825,11 @@ def delete(self, ind: int | slice | str | Iterable) -> None:
 
         false_pos = true_pos[ind]
         n_deleted = len(np.unique(false_pos))
+        n_rows = self.nrows
 
         valid_rows_np[false_pos] = False
         self._valid_rows[:] = valid_rows_np  # write back in-place; no new array created
-        self._n_rows -= n_deleted
+        self._n_rows = n_rows - n_deleted
         if self._last_pos is None or np.any(false_pos == self._last_pos - 1):
             self._last_pos = None  # last live row deleted; recalculate on next write
         self._storage.bump_visibility_epoch()
@@ -6447,6 +7847,22 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
 
         Pass ``validate=False`` to skip per-row Pydantic validation on trusted
         bulk imports.  Raises ``ValueError`` if the table is read-only or a view.
+
+        For tables with **nested (dotted) column names** both the dict-of-arrays
+        and list-of-dicts forms accept the original nested dict shape and
+        auto-flatten it to physical dotted leaf names::
+
+            # nested dict of arrays
+            t.extend({
+                "trip": {"begin": {"lon": lons, "lat": lats}},
+                "payment": {"fare": fares},
+            })
+
+            # list of nested dicts
+            t.extend([
+                {"trip": {"begin": {"lon": -87.6, "lat": 41.8}}, "payment": {"fare": 12.5}},
+                {"trip": {"begin": {"lon": -87.5, "lat": 41.7}}, "payment": {"fare": 8.0}},
+            ])
         """
         if self._read_only:
             raise ValueError("Table is read-only (opened with mode='r').")
@@ -6476,6 +7892,8 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
                     provided_names.add(name)
         else:
             if isinstance(data, dict):
+                if any(isinstance(v, dict) for v in data.values()):
+                    data = self._flatten_nested_dict(data)
                 known_names = [name for name in current_col_names if name in data]
                 if not known_names:
                     raise ValueError("No known stored columns provided for extend().")
@@ -6499,6 +7917,26 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
                 new_nrows = len(data)
                 raw_columns = {name: data[name] for name in data.dtype.names if name in current_col_names}
                 provided_names = set(raw_columns)
+            elif data and isinstance(data[0], dict):
+                # List of dicts: flatten any nested dicts and pivot to column arrays.
+                flat_rows = [
+                    self._flatten_nested_dict(row) if any(isinstance(v, dict) for v in row.values()) else row
+                    for row in data
+                ]
+                new_nrows = len(flat_rows)
+                col_set = set(input_col_names)
+                raw_columns = {
+                    name: [row[name] for row in flat_rows]
+                    for name in input_col_names
+                    if name in flat_rows[0]
+                }
+                provided_names = set(raw_columns)
+                # Fill any remaining columns from the rows (may include extra keys)
+                for row in flat_rows:
+                    for key in row:
+                        if key in col_set and key not in raw_columns:
+                            raw_columns[key] = [r.get(key) for r in flat_rows]
+                            provided_names.add(key)
             else:
                 new_nrows = len(data)
                 batch_columns = list(zip(*data, strict=False))
@@ -6522,12 +7960,15 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
         scalar_processed_cols: dict[str, blosc2.NDArray] = {}
         list_processed_cols: dict[str, list] = {}
         varlen_scalar_processed_cols: dict[str, list] = {}
+        dict_processed_cols: dict[str, list] = {}
         for name in current_col_names:
             col_meta = self._schema.columns_by_name[name]
             if self._is_list_column(col_meta):
                 list_processed_cols[name] = list(raw_columns[name])
             elif self._is_varlen_scalar_column(col_meta):
                 varlen_scalar_processed_cols[name] = list(raw_columns[name])
+            elif self._is_dictionary_column(col_meta):
+                dict_processed_cols[name] = list(raw_columns[name])
             else:
                 target_dtype = self._cols[name].dtype
                 if isinstance(col_meta.spec, timestamp):
@@ -6568,12 +8009,16 @@ def extend(self, data: list | CTable | Any, *, validate: bool | None = None) ->
                 self._cols[name].extend(list_processed_cols[name], validate=do_validate)
             elif self._is_varlen_scalar_column(col_meta):
                 self._cols[name].extend(varlen_scalar_processed_cols[name])
+            elif self._is_dictionary_column(col_meta):
+                # DictionaryColumn.__setitem__ with a slice encodes all values.
+                self._cols[name][start_pos:end_pos] = dict_processed_cols[name]
             else:
                 self._cols[name][start_pos:end_pos] = scalar_processed_cols[name][:]
 
+        n_rows = self.nrows
         self._valid_rows[start_pos:end_pos] = True
         self._last_pos = end_pos
-        self._n_rows += new_nrows
+        self._n_rows = n_rows + new_nrows
         self._mark_all_indexes_stale()
 
     # ------------------------------------------------------------------
@@ -6584,11 +8029,41 @@ def _where_expression_operands(self) -> dict[str, blosc2.NDArray | blosc2.LazyEx
         operands = {}
         for name, arr in self._cols.items():
             col = self._schema.columns_by_name.get(name)
-            if col is not None and not (self._is_list_column(col) or self._is_varlen_scalar_column(col)):
+            if col is not None and not (
+                self._is_list_column(col)
+                or self._is_varlen_scalar_column(col)
+                or self._is_dictionary_column(col)
+            ):
                 operands[name] = arr
         operands.update({name: cc["lazy"] for name, cc in self._computed_cols.items()})
         return operands
 
+    def _rewrite_nested_expression(
+        self, expr: str, operands: dict[str, blosc2.NDArray | blosc2.LazyExpr]
+    ) -> tuple[str, dict[str, blosc2.NDArray | blosc2.LazyExpr]]:
+        """Rewrite dotted nested names in *expr* to safe identifiers.
+
+        `blosc2.lazyexpr` does not accept dotted identifiers, but nested leaf
+        columns are naturally addressed as dotted paths (e.g. ``trip.begin.lon``).
+        This maps them to temporary aliases and returns rewritten expression and
+        operand mapping.
+        """
+        dotted = [name for name in operands if "." in name]
+        if not dotted:
+            return expr, operands
+
+        rewritten = expr
+        new_operands = dict(operands)
+        # Longest names first so trip.begin.lon is rewritten before trip.begin.
+        for i, name in enumerate(sorted(dotted, key=len, reverse=True)):
+            alias = f"__nf{i}"
+            pattern = rf"(?<![\w.]){re.escape(name)}(?![\w.])"
+            replaced = re.sub(pattern, alias, rewritten)
+            if replaced != rewritten:
+                rewritten = replaced
+                new_operands[alias] = new_operands.pop(name)
+        return rewritten, new_operands
+
     def _guard_varlen_scalar_expression(self, expr: str) -> None:
         for col in self._schema.columns:
             if self._is_varlen_scalar_column(col) and re.search(
@@ -6664,6 +8139,12 @@ def where(
 
             view = t.where((t["unit price"] * t["quantity"]) > 100)
 
+        For tables with **nested (dotted) column names**, dotted leaf names and
+        attribute-chain proxies work in both string and expression forms::
+
+            view = t.where("trip.begin.lon > -87.7 and payment.fare > 10")
+            view = t.where(t.trip.begin.lon > -87.7)
+
         Notes
         -----
         Use bitwise operators (``&``, ``|``, ``~``) or string expressions for
@@ -6684,7 +8165,9 @@ def where(
         """
         if isinstance(expr_result, str):
             self._guard_varlen_scalar_expression(expr_result)
-            expr_result = blosc2.lazyexpr(expr_result, self._where_expression_operands())
+            operands = self._where_expression_operands()
+            expr_result, operands = self._rewrite_nested_expression(expr_result, operands)
+            expr_result = blosc2.lazyexpr(expr_result, operands)
         if isinstance(expr_result, np.ndarray) and expr_result.dtype == np.bool_:
             expr_result = blosc2.asarray(expr_result)
         if isinstance(expr_result, Column):
diff --git a/src/blosc2/ctable_storage.py b/src/blosc2/ctable_storage.py
index fab3ac11..145e161b 100644
--- a/src/blosc2/ctable_storage.py
+++ b/src/blosc2/ctable_storage.py
@@ -28,6 +28,7 @@
 
 import blosc2
 from blosc2.batch_array import BatchArray
+from blosc2.dictionary_column import DictionaryColumn
 from blosc2.list_array import ListArray
 from blosc2.scalar_array import (
     _make_persistent_backend,
@@ -94,6 +95,19 @@ def create_varlen_scalar_column(
     def open_varlen_scalar_column(self, name: str, spec) -> _ScalarVarLenArray:
         raise NotImplementedError
 
+    def create_dictionary_column(
+        self,
+        name: str,
+        *,
+        spec,
+        cparams: dict[str, Any] | None = None,
+        dparams: dict[str, Any] | None = None,
+    ) -> DictionaryColumn:
+        raise NotImplementedError
+
+    def open_dictionary_column(self, name: str, spec) -> DictionaryColumn:
+        raise NotImplementedError
+
     def create_valid_rows(
         self,
         *,
@@ -206,6 +220,17 @@ def create_varlen_scalar_column(self, name, *, spec, cparams=None, dparams=None)
     def open_varlen_scalar_column(self, name, spec):
         raise RuntimeError("In-memory tables have no on-disk representation to open.")
 
+    def create_dictionary_column(self, name, *, spec, cparams=None, dparams=None):
+        from blosc2.schema import VLStringSpec
+
+        chunks, blocks = (4096,), (256,)
+        codes = blosc2.zeros((4096,), dtype=np.int32, chunks=chunks, blocks=blocks)
+        dict_store = _ScalarVarLenArray(VLStringSpec(nullable=False))
+        return DictionaryColumn(spec, codes, dict_store)
+
+    def open_dictionary_column(self, name, spec):
+        raise RuntimeError("In-memory tables have no on-disk representation to open.")
+
     def create_valid_rows(self, *, shape, chunks, blocks):
         return blosc2.zeros(shape, dtype=np.bool_, chunks=chunks, blocks=blocks)
 
@@ -268,6 +293,63 @@ def index_anchor_path(self, col_name: str) -> str | None:
 _COLS_DIR = "_cols"
 
 
+def split_field_path(path: str) -> tuple[str, ...]:
+    """Split a dotted logical field path into segments.
+
+    A backslash escapes separator characters, so ``"a\\.b.c"`` means the
+    two-segment path ``("a.b", "c")``.  The empty string is the canonical root.
+    """
+    if path == "":
+        return ()
+    parts: list[str] = []
+    buf: list[str] = []
+    escaped = False
+    for ch in path:
+        if escaped:
+            buf.append(ch)
+            escaped = False
+        elif ch == "\\":
+            escaped = True
+        elif ch == ".":
+            parts.append("".join(buf))
+            buf = []
+        else:
+            buf.append(ch)
+    if escaped:
+        buf.append("\\")
+    parts.append("".join(buf))
+    return tuple(parts)
+
+
+def join_field_path(parts: tuple[str, ...] | list[str]) -> str:
+    """Join logical path segments using dot syntax with backslash escaping."""
+    escaped_parts = []
+    for part in parts:
+        buf: list[str] = []
+        for ch in part:
+            if ch in {"\\", ".", "/"}:
+                buf.append("\\")
+            buf.append(ch)
+        escaped_parts.append("".join(buf))
+    return ".".join(escaped_parts)
+
+
+def _encode_storage_segment(segment: str) -> str:
+    """Percent-encode characters that are structural in logical/storage paths."""
+    return segment.replace("%", "%25").replace("/", "%2F").replace(".", "%2E").replace("\\", "%5C")
+
+
+def _column_name_to_relpath(name: str) -> str:
+    """Map a logical column name to a hierarchical path under ``_cols``.
+
+    Unescaped dots are interpreted as nested path separators
+    (``a.b.c`` -> ``a/b/c``). Literal dots/slashes/backslashes in field names
+    can be represented with :func:`join_field_path` and are percent-encoded in
+    the physical storage path.
+    """
+    return "/".join(_encode_storage_segment(part) for part in split_field_path(name))
+
+
 class FileTableStorage(TableStorage):
     """Arrays stored as TreeStore leaves inside *urlpath*.
 
@@ -281,13 +363,13 @@ class FileTableStorage(TableStorage):
         ``'r'`` — open existing read-only.
     """
 
-    def __init__(self, urlpath: str, mode: str) -> None:
+    def __init__(self, urlpath: str, mode: str, store: blosc2.TreeStore | None = None) -> None:
         if mode not in ("r", "a", "w"):
             raise ValueError(f"mode must be 'r', 'a', or 'w'; got {mode!r}")
         self._root = urlpath
         self._mode = mode
         self._meta: blosc2.SChunk | None = None
-        self._store: blosc2.TreeStore | None = None
+        self._store: blosc2.TreeStore | None = store
 
     # ------------------------------------------------------------------
     # Key helpers
@@ -310,8 +392,13 @@ def _list_col_path(self, name: str) -> str:
         # For .b2d, working_dir == self._root, so behaviour is unchanged.
         return os.path.join(self._open_store().working_dir, rel_key + ".b2b")
 
+    def _dict_col_path(self, name: str) -> str:
+        """Path for the dictionary values store of a dictionary column."""
+        rel_key = self._col_key(name).lstrip("/")
+        return os.path.join(self._open_store().working_dir, rel_key + "_dict.b2b")
+
     def _col_key(self, name: str) -> str:
-        return f"/{_COLS_DIR}/{name}"
+        return f"/{_COLS_DIR}/{_column_name_to_relpath(name)}"
 
     def _key_to_path(self, key: str) -> str:
         rel_key = key.lstrip("/")
@@ -366,13 +453,14 @@ def create_list_column(self, name, *, spec, cparams, dparams):
             kwargs["cparams"] = cparams
         if dparams is not None:
             kwargs["dparams"] = dparams
+        os.makedirs(os.path.dirname(self._list_col_path(name)), exist_ok=True)
         return ListArray(spec=spec, **kwargs)
 
     def open_list_column(self, name: str) -> ListArray:
         store = self._open_store()
         if store.is_zip_store and self._mode == "r":
             # In read mode, .b2z is never extracted — read the member at its zip offset directly.
-            rel = f"{_COLS_DIR}/{name}.b2b"
+            rel = self._col_key(name).lstrip("/") + ".b2b"
             if rel not in store.offsets:
                 raise KeyError(f"List column {name!r} not found in {self._root!r}")
             opened = blosc2.blosc2_ext.open(store.b2z_path, mode="r", offset=store.offsets[rel]["offset"])
@@ -388,7 +476,7 @@ def open_varlen_scalar_column(self, name: str, spec) -> _ScalarVarLenArray:
         store = self._open_store()
         path = self._list_col_path(name)
         if store.is_zip_store and self._mode == "r":
-            rel = f"{_COLS_DIR}/{name}.b2b"
+            rel = self._col_key(name).lstrip("/") + ".b2b"
             if rel not in store.offsets:
                 raise KeyError(f"Varlen scalar column {name!r} not found in {self._root!r}")
             backend = BatchArray(
@@ -401,6 +489,47 @@ def open_varlen_scalar_column(self, name: str, spec) -> _ScalarVarLenArray:
         _validate_role_metadata(backend, spec)
         return _ScalarVarLenArray(spec, backend)
 
+    def create_dictionary_column(self, name, *, spec, cparams=None, dparams=None) -> DictionaryColumn:
+        from blosc2.schema import VLStringSpec
+
+        # Codes: stored as a regular NDArray under _cols/name
+        codes = self.create_column(
+            name,
+            dtype=np.int32,
+            shape=(4096,),
+            chunks=(4096,),
+            blocks=(256,),
+            cparams=cparams,
+            dparams=dparams,
+        )
+        # Dictionary values: stored as a varlen scalar (vlstring) at name_dict.b2b
+        dict_spec = VLStringSpec(nullable=False)
+        dict_path = self._dict_col_path(name)
+        dict_backend = _make_persistent_backend(dict_spec, dict_path, "w")
+        dict_store = _ScalarVarLenArray(dict_spec, dict_backend)
+        return DictionaryColumn(spec, codes, dict_store)
+
+    def open_dictionary_column(self, name: str, spec) -> DictionaryColumn:
+        from blosc2.schema import VLStringSpec
+
+        codes = self.open_column(name)
+        dict_spec = VLStringSpec(nullable=False)
+        store = self._open_store()
+        dict_path = self._dict_col_path(name)
+        if store.is_zip_store and self._mode == "r":
+            rel = self._col_key(name).lstrip("/") + "_dict.b2b"
+            if rel not in store.offsets:
+                raise KeyError(f"Dictionary column dict store {name!r} not found in {self._root!r}")
+            dict_backend = BatchArray(
+                _from_schunk=blosc2.blosc2_ext.open(
+                    store.b2z_path, mode="r", offset=store.offsets[rel]["offset"]
+                )
+            )
+        else:
+            dict_backend = _open_persistent_backend(dict_path, self._mode, spec=dict_spec)
+        dict_store = _ScalarVarLenArray(dict_spec, dict_backend)
+        return DictionaryColumn(spec, codes, dict_store)
+
     def create_valid_rows(self, *, shape, chunks, blocks):
         valid_rows = blosc2.zeros(
             shape,
@@ -685,9 +814,12 @@ def _open_leaf(self, logical_key: str) -> Any:
         full_key = self._table_key(logical_key)
         return DictStore.__getitem__(self._store, full_key)
 
+    def _col_logical_key(self, name: str) -> str:
+        return f"/{_COLS_DIR}/{_column_name_to_relpath(name)}"
+
     def _list_col_path(self, name: str) -> str:
         """Filesystem path for a list-style column (``.b2b``)."""
-        return self._dest_path(f"/_cols/{name}", ".b2b")
+        return self._dest_path(self._col_logical_key(name), ".b2b")
 
     # ------------------------------------------------------------------
     # TableStorage interface — lifecycle
@@ -735,16 +867,16 @@ def create_column(
             kwargs["cparams"] = cparams
         if dparams is not None:
             kwargs["dparams"] = dparams
-        dest_path = self._dest_path(f"/_cols/{name}", ".b2nd")
+        dest_path = self._dest_path(self._col_logical_key(name), ".b2nd")
         os.makedirs(os.path.dirname(dest_path), exist_ok=True)
         col = blosc2.zeros(shape, dtype=dtype, urlpath=dest_path, mode="w", **kwargs)
         rel_path = os.path.relpath(dest_path, self._working_dir()).replace(os.sep, "/")
-        self._store.map_tree[self._table_key(f"/_cols/{name}")] = rel_path
+        self._store.map_tree[self._table_key(self._col_logical_key(name))] = rel_path
         self._store._modified = True
         return col
 
     def open_column(self, name: str) -> blosc2.NDArray:
-        return self._open_leaf(f"/_cols/{name}")
+        return self._open_leaf(self._col_logical_key(name))
 
     def create_list_column(
         self,
@@ -768,7 +900,7 @@ def create_list_column(
 
     def open_list_column(self, name: str) -> ListArray:
         if self._store.is_zip_store and self._mode == "r":
-            rel = self._table_key(f"/_cols/{name}").lstrip("/") + ".b2b"
+            rel = self._table_key(self._col_logical_key(name)).lstrip("/") + ".b2b"
             if rel not in self._store.offsets:
                 raise KeyError(f"List column {name!r} not found in {self._store.localpath!r}")
             opened = blosc2.blosc2_ext.open(
@@ -793,7 +925,7 @@ def create_varlen_scalar_column(
 
     def open_varlen_scalar_column(self, name: str, spec) -> _ScalarVarLenArray:
         if self._store.is_zip_store and self._mode == "r":
-            rel = self._table_key(f"/_cols/{name}").lstrip("/") + ".b2b"
+            rel = self._table_key(self._col_logical_key(name)).lstrip("/") + ".b2b"
             if rel not in self._store.offsets:
                 raise KeyError(f"Varlen scalar column {name!r} not found in {self._store.localpath!r}")
             backend = BatchArray(
@@ -808,6 +940,59 @@ def open_varlen_scalar_column(self, name: str, spec) -> _ScalarVarLenArray:
         _validate_role_metadata(backend, spec)
         return _ScalarVarLenArray(spec, backend)
 
+    def _dict_col_path(self, name: str) -> str:
+        """Path for the dictionary values store of a dictionary column."""
+        return self._dest_path(self._col_logical_key(name), "_dict.b2b")
+
+    def create_dictionary_column(
+        self,
+        name: str,
+        *,
+        spec,
+        cparams=None,
+        dparams=None,
+    ) -> DictionaryColumn:
+        from blosc2.schema import VLStringSpec
+
+        codes = self.create_column(
+            name,
+            dtype=np.int32,
+            shape=(4096,),
+            chunks=(4096,),
+            blocks=(256,),
+            cparams=cparams,
+            dparams=dparams,
+        )
+        dict_spec = VLStringSpec(nullable=False)
+        dict_path = self._dict_col_path(name)
+        os.makedirs(os.path.dirname(dict_path), exist_ok=True)
+        dict_backend = _make_persistent_backend(dict_spec, dict_path, "w")
+        dict_store = _ScalarVarLenArray(dict_spec, dict_backend)
+        return DictionaryColumn(spec, codes, dict_store)
+
+    def open_dictionary_column(self, name: str, spec) -> DictionaryColumn:
+        from blosc2.schema import VLStringSpec
+
+        codes = self.open_column(name)
+        dict_spec = VLStringSpec(nullable=False)
+        if self._store.is_zip_store and self._mode == "r":
+            rel = self._table_key(self._col_logical_key(name)).lstrip("/") + "_dict.b2b"
+            if rel not in self._store.offsets:
+                raise KeyError(
+                    f"Dictionary column dict store {name!r} not found in {self._store.localpath!r}"
+                )
+            dict_backend = BatchArray(
+                _from_schunk=blosc2.blosc2_ext.open(
+                    self._store.b2z_path,
+                    mode="r",
+                    offset=self._store.offsets[rel]["offset"],
+                )
+            )
+        else:
+            dict_backend = _open_persistent_backend(self._dict_col_path(name), self._mode, spec=dict_spec)
+        dict_store = _ScalarVarLenArray(dict_spec, dict_backend)
+        return DictionaryColumn(spec, codes, dict_store)
+
     def create_valid_rows(
         self,
         *,
@@ -876,7 +1061,7 @@ def column_names_from_schema(self) -> list[str]:
         return [c["name"] for c in self.load_schema()["columns"]]
 
     def delete_column(self, name: str) -> None:
-        full_key = self._table_key(f"/_cols/{name}")
+        full_key = self._table_key(self._col_logical_key(name))
         if full_key in self._store.map_tree:
             filepath = self._store.map_tree.pop(full_key)
             full_path = os.path.join(self._working_dir(), filepath)
@@ -890,10 +1075,10 @@ def delete_column(self, name: str) -> None:
         raise KeyError(name)
 
     def rename_column(self, old: str, new: str) -> blosc2.NDArray:
-        old_key = self._table_key(f"/_cols/{old}")
-        new_key = self._table_key(f"/_cols/{new}")
+        old_key = self._table_key(self._col_logical_key(old))
+        new_key = self._table_key(self._col_logical_key(new))
         if old_key in self._store.map_tree:
-            new_dest = self._dest_path(f"/_cols/{new}", ".b2nd")
+            new_dest = self._dest_path(self._col_logical_key(new), ".b2nd")
             old_dest = os.path.join(self._working_dir(), self._store.map_tree[old_key])
             os.makedirs(os.path.dirname(new_dest), exist_ok=True)
             os.replace(old_dest, new_dest)
diff --git a/src/blosc2/dict_store.py b/src/blosc2/dict_store.py
index 4a8ff2e1..7a7b38ab 100644
--- a/src/blosc2/dict_store.py
+++ b/src/blosc2/dict_store.py
@@ -343,22 +343,37 @@ def _init_write_append_mode(
         self._update_map_tree()
 
     def _update_map_tree(self):
-        # Build map_tree from supported external leaves in working dir.
+        """Build map_tree from supported external leaves in working dir.
+
+        Trust canonical external leaf suffixes on the fast path.  Fall back to
+        metadata probing for legacy or manually renamed leaves with unusual
+        suffixes, preserving discovery warnings and compatibility.
+        """
+        external_exts = {".b2nd", ".b2f", ".b2b"}
         for root, _, files in os.walk(self.working_dir):
             for file in files:
                 filepath = os.path.join(root, file)
                 if os.path.abspath(filepath) == os.path.abspath(self.estore_path):
                     continue
                 rel_path = os.path.relpath(filepath, self.working_dir).replace(os.sep, "/")
-                if self._probe_external_leaf_path(rel_path):
+                if os.path.splitext(rel_path)[1] in external_exts or self._probe_external_leaf_path(
+                    rel_path
+                ):
                     self.map_tree[self._logical_key_from_relpath(rel_path)] = rel_path
 
     def _update_map_tree_from_offsets(self):
-        """Build map_tree from supported external leaves in a zip store."""
+        """Build map_tree from supported external leaves in a zip store.
+
+        Zip-backed stores written by DictStore/TreeStore use canonical external
+        leaf suffixes.  Trusting those suffixes avoids opening every member just
+        to classify it, which is especially important for compact CTable stores
+        with many columns.
+        """
+        external_exts = {".b2nd", ".b2f", ".b2b"}
         for filepath in self.offsets:
             if filepath == "embed.b2e":
                 continue
-            if self._probe_external_leaf_offset(filepath):
+            if os.path.splitext(filepath)[1] in external_exts or self._probe_external_leaf_offset(filepath):
                 self.map_tree[self._logical_key_from_relpath(filepath)] = filepath
 
     def _annotate_external_value(
diff --git a/src/blosc2/dictionary_column.py b/src/blosc2/dictionary_column.py
new file mode 100644
index 00000000..f9148d07
--- /dev/null
+++ b/src/blosc2/dictionary_column.py
@@ -0,0 +1,280 @@
+#######################################################################
+# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#######################################################################
+
+"""Dictionary-encoded string column for CTable.
+
+Physical layout
+---------------
+A dictionary column is stored as two components:
+
+* **codes** — a fixed-width ``int32`` NDArray with one code per physical row
+  slot.  The special code ``null_code`` (default ``-1``) marks null slots.
+* **dict_store** — a variable-length string array (:class:`_ScalarVarLenArray`)
+  holding unique category values in first-seen order.
+
+An in-memory mapping ``_value_to_code: dict[str, int]`` is built lazily from
+the persisted dict_store on open and kept in sync during writes.
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+if TYPE_CHECKING:
+    from blosc2.scalar_array import _ScalarVarLenArray
+    from blosc2.schema import DictionarySpec
+
+
+_NULL_INT32 = np.int32(-1)
+
+
+class DictionaryColumn:
+    """Row-wise dictionary-encoded string column wrapping codes + dict_store.
+
+    This class is internal; obtain instances via
+    ``storage.create_dictionary_column()`` or ``storage.open_dictionary_column()``.
+
+    Parameters
+    ----------
+    spec:
+        The :class:`~blosc2.schema.DictionarySpec` that describes this column.
+    codes:
+        A ``blosc2.NDArray`` of dtype ``int32`` with one slot per physical row.
+    dict_store:
+        A :class:`~blosc2.scalar_array._ScalarVarLenArray` holding unique
+        category strings in insertion order (no nulls).
+    """
+
+    def __init__(self, spec: DictionarySpec, codes, dict_store: _ScalarVarLenArray) -> None:
+        self._spec = spec
+        self._codes = codes  # int32 NDArray (physical slot array)
+        self._dict_store = dict_store  # _ScalarVarLenArray of vlstring (unique values)
+        # Cache: str → int32 code.  Built lazily from dict_store on first access.
+        self._value_to_code: dict[str, int] | None = None
+
+    # ------------------------------------------------------------------
+    # Cache management
+    # ------------------------------------------------------------------
+
+    def _ensure_cache(self) -> None:
+        """Build the value→code mapping from the persisted dict_store."""
+        if self._value_to_code is not None:
+            return
+        self._dict_store.flush()
+        cache: dict[str, int] = {}
+        for code, value in enumerate(self._dict_store):
+            if value is not None:
+                cache[value] = code
+        self._value_to_code = cache
+
+    def _invalidate_cache(self) -> None:
+        self._value_to_code = None
+
+    # ------------------------------------------------------------------
+    # Encoding / decoding
+    # ------------------------------------------------------------------
+
+    def encode(self, value: str | None) -> int:
+        """Encode *value* to an int32 code.  Appends new values to the dictionary."""
+        if value is None:
+            if not self._spec.nullable:
+                raise ValueError(f"Dictionary column {self._spec!r} is not nullable; received None.")
+            return self._spec.null_code
+        if not isinstance(value, str):
+            raise TypeError(f"Dictionary column expects str or None values, got {type(value).__name__!r}.")
+        self._ensure_cache()
+        assert self._value_to_code is not None
+        code = self._value_to_code.get(value)
+        if code is not None:
+            return code
+        # New category — append to dictionary.
+        new_code = len(self._value_to_code)
+        if new_code > np.iinfo(np.int32).max:
+            raise OverflowError(
+                "Dictionary column has exceeded the maximum number of unique values (2^31 - 1)."
+            )
+        self._dict_store.append(value)
+        self._value_to_code[value] = new_code
+        return new_code
+
+    def decode(self, code: int) -> str | None:
+        """Decode an int32 *code* to its string value, or ``None`` for null codes."""
+        if code == self._spec.null_code:
+            return None
+        self._ensure_cache()
+        return self._dict_store[int(code)]
+
+    def encode_batch(self, values) -> np.ndarray:
+        """Encode a sequence of str/None to a numpy ``int32`` array of codes."""
+        result = np.empty(len(values), dtype=np.int32)
+        for i, v in enumerate(values):
+            result[i] = self.encode(v)
+        return result
+
+    def value_to_code(self, value: str) -> int:
+        """Return the code for *value*.  Raises :exc:`KeyError` if absent."""
+        self._ensure_cache()
+        assert self._value_to_code is not None
+        if value not in self._value_to_code:
+            raise KeyError(value)
+        return self._value_to_code[value]
+
+    def code_to_value(self, code: int) -> str | None:
+        """Return the category string for *code*."""
+        return self.decode(code)
+
+    # ------------------------------------------------------------------
+    # Arrow-optimised batch import
+    # ------------------------------------------------------------------
+
+    def extend_from_arrow(self, pa, arrow_col, pos: int, m: int, *, ordered: bool = False) -> None:
+        """Write *m* rows from an Arrow dictionary array into the codes NDArray at *pos*.
+
+        Performs global dictionary unification: chunk-local codes are remapped
+        to global codes.  ``ordered=True`` raises if chunk dictionary order
+        differs from the established global order.
+        """
+        local_dict = arrow_col.dictionary.to_pylist()
+
+        # Build local-code → global-code mapping.
+        local_to_global: dict[int, int] = {}
+        for local_code, value in enumerate(local_dict):
+            if value is None:
+                local_to_global[local_code] = self._spec.null_code
+            else:
+                local_to_global[local_code] = self.encode(value)
+
+        if ordered and len(local_dict) > 0:
+            self._validate_ordered_chunk_dict(local_dict)
+
+        # Translate Arrow indices to global int32 codes.
+        indices = arrow_col.indices.to_pylist()
+        global_codes = np.empty(m, dtype=np.int32)
+        for i, idx in enumerate(indices):
+            if idx is None:
+                if not self._spec.nullable:
+                    raise ValueError("Dictionary column is not nullable but Arrow input contains nulls.")
+                global_codes[i] = self._spec.null_code
+            else:
+                global_codes[i] = local_to_global[int(idx)]
+
+        self._codes[pos : pos + m] = global_codes
+
+    def _validate_ordered_chunk_dict(self, local_dict: list) -> None:
+        """Raise if *local_dict* order differs from the existing global order."""
+        self._ensure_cache()
+        assert self._value_to_code is not None
+        for local_code, value in enumerate(local_dict):
+            if value is None:
+                continue
+            global_code = self._value_to_code.get(value)
+            if global_code is not None and global_code != local_code:
+                raise ValueError(
+                    f"ordered=True dictionary column has inconsistent ordering across Arrow "
+                    f"batches: value {value!r} has global code {global_code} but appears as "
+                    f"local code {local_code} in this chunk."
+                )
+
+    # ------------------------------------------------------------------
+    # Core interface: __len__, __getitem__, __setitem__
+    # ------------------------------------------------------------------
+
+    def __len__(self) -> int:
+        """Return the physical slot capacity (same as the codes NDArray length)."""
+        return len(self._codes)
+
+    def __getitem__(self, key) -> str | None | list:
+        """Return decoded value(s) for the given index.
+
+        - ``int``                → ``str | None``
+        - ``slice``              → ``list``
+        - ``numpy.ndarray``/``list`` → ``list``
+        """
+        if isinstance(key, (int, np.integer)):
+            return self.decode(int(self._codes[int(key)]))
+        if isinstance(key, slice):
+            codes_arr = np.asarray(self._codes[key], dtype=np.int32)
+            return [self.decode(int(c)) for c in codes_arr]
+        if isinstance(key, (list, np.ndarray)):
+            codes_arr = self._codes[key]
+            if isinstance(codes_arr, np.ndarray):
+                return [self.decode(int(c)) for c in codes_arr.ravel()]
+            return [self.decode(int(codes_arr))]
+        raise TypeError(f"DictionaryColumn indices must be int, slice, or array; got {type(key)!r}")
+
+    def __setitem__(self, key, value) -> None:
+        """Encode *value* (str/None or list thereof) and write the code(s)."""
+        if isinstance(key, (int, np.integer)):
+            self._codes[int(key)] = np.int32(self.encode(value))
+        elif isinstance(key, slice):
+            if isinstance(value, (list, tuple, np.ndarray)):
+                self._codes[key] = self.encode_batch(list(value))
+            else:
+                # scalar broadcast
+                code = np.int32(self.encode(value))
+                self._codes[key] = code
+        elif isinstance(key, (list, np.ndarray)):
+            self._codes[key] = self.encode_batch(list(value))
+        else:
+            raise TypeError(f"DictionaryColumn indices must be int, slice, or array; got {type(key)!r}")
+
+    def resize(self, shape: tuple) -> None:
+        """Resize the underlying codes NDArray (delegates to the NDArray)."""
+        self._codes.resize(shape)
+
+    # ------------------------------------------------------------------
+    # Flush / close
+    # ------------------------------------------------------------------
+
+    def flush(self) -> None:
+        """Flush pending dict_store batches to the backend."""
+        self._dict_store.flush()
+
+    # ------------------------------------------------------------------
+    # Public properties
+    # ------------------------------------------------------------------
+
+    @property
+    def codes(self):
+        """The underlying ``int32`` NDArray of category codes."""
+        return self._codes
+
+    @property
+    def dictionary(self) -> list[str]:
+        """Return the list of unique dictionary values in insertion order."""
+        self._dict_store.flush()
+        return list(self._dict_store)
+
+    @property
+    def spec(self) -> DictionarySpec:
+        return self._spec
+
+    @property
+    def dtype(self):
+        """Always ``None`` — dictionary columns have no fixed NumPy dtype."""
+        return None
+
+    @property
+    def urlpath(self) -> str | None:
+        return getattr(self._codes, "urlpath", None)
+
+    @property
+    def nbytes(self) -> int:
+        return self._codes.nbytes + self._dict_store.nbytes
+
+    @property
+    def cbytes(self) -> int:
+        return self._codes.cbytes + self._dict_store.cbytes
+
+    @property
+    def cratio(self) -> float:
+        cb = self.cbytes
+        if cb == 0:
+            return float("inf")
+        return self.nbytes / cb
diff --git a/src/blosc2/lazyexpr.py b/src/blosc2/lazyexpr.py
index 7946aeb3..7858f7a1 100644
--- a/src/blosc2/lazyexpr.py
+++ b/src/blosc2/lazyexpr.py
@@ -42,6 +42,8 @@
 import numpy as np
 
 import blosc2
+from blosc2 import compute_chunks_blocks
+from blosc2.info import InfoReporter
 
 from .b2objects import (
     encode_b2object_payload,
@@ -51,13 +53,6 @@
     write_b2object_user_vlmeta,
 )
 from .dsl_kernel import DSLKernel, DSLSyntaxError, DSLValidator, specialize_miniexpr_inputs
-
-if blosc2._HAS_NUMBA:
-    import numba
-
-from blosc2 import compute_chunks_blocks
-from blosc2.info import InfoReporter
-
 from .proxy import convert_dtype
 from .utils import (
     check_smaller_shape,
@@ -73,6 +68,7 @@
     linalg_funcs,
     npcumprod,
     npcumsum,
+    populate_safe_numpy_globals,
     process_key,
     reducers,
     safe_numpy_globals,
@@ -105,6 +101,7 @@ def ne_evaluate(expression, local_dict=None, **kwargs):
     }
     if blosc2.IS_WASM:
         global safe_numpy_globals
+        populate_safe_numpy_globals(expression)
         if "out" in kwargs:
             out = kwargs.pop("out")
             out[:] = eval(expression, safe_numpy_globals, local_dict)
@@ -221,17 +218,7 @@ def _get_result(expression, chunk_operands, ne_args, where=None, indices=None, _
 blosc2_funcs = constructors + linalg_funcs + elementwise_funcs + reducers
 # functions that have to be evaluated before chunkwise lazyexpr machinery
 eager_funcs = linalg_funcs + reducers + ["slice"] + ["." + attr for attr in linalg_attrs]
-# Gather all callable functions in numpy
-numpy_funcs = {
-    name
-    for name, member in inspect.getmembers(np, callable)
-    if not name.startswith("_") and not isinstance(member, np.ufunc)
-}
-numpy_ufuncs = {name for name, member in inspect.getmembers(np, lambda x: isinstance(x, np.ufunc))}
-# Add these functions to the list of available functions
-# (will be evaluated via the array interface)
-additional_funcs = sorted((numpy_funcs | numpy_ufuncs) - set(blosc2_funcs))
-functions = blosc2_funcs + additional_funcs
+functions = blosc2_funcs
 _constructor_call_patterns = {name: re.compile(rf"\b{re.escape(name)}\s*\(") for name in constructors}
 
 
@@ -264,21 +251,34 @@ def _find_constructor_call(expression: str, constructor: str) -> re.Match | None
 def get_expr_globals(expression):
     """Build a dictionary of functions needed for evaluating the expression."""
     _globals = {"np": np, "blosc2": blosc2}
-    # Only check for functions that actually appear in the expression
-    # This avoids many unnecessary string searches
+    # Only check for functions that actually appear in the expression.
     for func in functions:
         if func in expression:
-            # Try blosc2 first
             if hasattr(blosc2, func):
                 _globals[func] = getattr(blosc2, func)
-            # Fall back to numpy
             else:
                 try:
                     _globals[func] = safe_numpy_globals[func]
-                # Function not found in either module
                 except KeyError as e:
                     raise AttributeError(f"Function {func} not found in blosc2 or numpy") from e
 
+    # Lazily support bare numpy calls not covered by the Blosc2 function list.
+    populate_safe_numpy_globals(expression)
+    try:
+        tree = ast.parse(expression, mode="eval")
+    except SyntaxError:
+        return _globals
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call) or not isinstance(node.func, ast.Name):
+            continue
+        func = node.func.id
+        if func in _globals:
+            continue
+        if hasattr(blosc2, func):
+            _globals[func] = getattr(blosc2, func)
+        elif func in safe_numpy_globals:
+            _globals[func] = safe_numpy_globals[func]
+
     return _globals
 
 
@@ -4755,6 +4755,8 @@ def _reconstruct_lazyudf(expr, lazyarray, operands_dict, array):
         "blosc2": blosc2,
     }
     if blosc2._HAS_NUMBA:
+        import numba
+
         SAFE_GLOBALS["numba"] = numba
 
     # Register the source so inspect can find it
diff --git a/src/blosc2/list_array.py b/src/blosc2/list_array.py
index 181da0a3..ec890de3 100644
--- a/src/blosc2/list_array.py
+++ b/src/blosc2/list_array.py
@@ -20,7 +20,7 @@
 from blosc2.batch_array import BatchArray
 from blosc2.info import InfoReporter, format_nbytes_info
 from blosc2.objectarray import ObjectArray
-from blosc2.schema import ListSpec, SchemaSpec, StructSpec
+from blosc2.schema import DictionarySpec, ListSpec, SchemaSpec, StructSpec, timestamp
 from blosc2.schema import list as list_spec_builder
 
 _SUPPORTED_SERIALIZERS = {"msgpack", "arrow"}
@@ -132,6 +132,14 @@ def _coerce_scalar_item(spec: SchemaSpec, value: Any) -> Any:  # noqa: C901
 
     if isinstance(spec, StructSpec):
         return _coerce_struct_item(spec, value)
+    if isinstance(spec, ListSpec):
+        return coerce_list_cell(spec, value)
+    if isinstance(spec, DictionarySpec):
+        if value is None:
+            raise ValueError("ListArray does not support nullable items inside a list in V1")
+        if not isinstance(value, str):
+            value = str(value)
+        return value
 
     if getattr(spec, "python_type", None) is str:
         if not isinstance(value, str):
@@ -146,7 +154,12 @@ def _coerce_scalar_item(spec: SchemaSpec, value: Any) -> Any:  # noqa: C901
         dtype = getattr(spec, "dtype", None)
         if dtype is None:
             raise TypeError(f"Unsupported list item spec {type(spec).__name__!r}")
-        value = np.array(value, dtype=dtype).item()
+        if isinstance(spec, timestamp) and (
+            isinstance(value, (np.datetime64, str)) or hasattr(value, "isoformat")
+        ):
+            value = np.datetime64(value).astype(f"datetime64[{spec.unit}]").astype(np.int64).item()
+        else:
+            value = np.array(value, dtype=dtype).item()
 
     ge = getattr(spec, "ge", None)
     if ge is not None and value < ge:
@@ -380,6 +393,28 @@ def extend(self, values: Iterable[Any], *, validate: bool = True) -> None:
         self._pending_cells.extend(cells)
         self._flush_full_batches()
 
+    def extend_arrow(self, arrow_array) -> None:
+        """Append a PyArrow list array without materializing Python cells.
+
+        This requires batch storage with ``serializer='arrow'`` and is intended
+        for trusted Arrow/Parquet import paths.
+        """
+        pa = _require_pyarrow()
+        if isinstance(arrow_array, pa.ChunkedArray):
+            chunks = arrow_array.chunks
+        else:
+            chunks = [arrow_array]
+        if self.spec.storage != "batch" or self.spec.serializer != "arrow":
+            values = arrow_array.to_pylist() if hasattr(arrow_array, "to_pylist") else list(arrow_array)
+            self.extend(values, validate=False)
+            return
+        for chunk in chunks:
+            if len(chunk) == 0:
+                continue
+            self._backend.append(chunk)
+            self._persisted_row_count += len(chunk)
+            self._invalidate_batch_caches()
+
     def flush(self) -> None:
         """Persist any pending rows when using the batch backend."""
         if self.spec.storage != "batch":
@@ -455,6 +490,11 @@ def _get_many_grouped(self, indices: list[int]) -> list[Any]:
     def _get_many(self, indices: list[int]) -> list[Any]:
         if self.spec.storage == "vl":
             return [self._backend[index] for index in indices]
+        # For small selections from block-addressable batches, scalar access is
+        # much cheaper than materializing the full containing batch.  This is
+        # common for filtered column previews and small logical slices.
+        if getattr(self._backend, "items_per_block", None) is not None and len(indices) <= 1024:
+            return [self[index] for index in indices]
         if len(indices) <= 1:
             return self._get_many_grouped(indices)
         monotonic = True
@@ -489,7 +529,7 @@ def __getitem__(self, index: int | slice | list[int] | tuple[int, ...] | np.ndar
         if index >= self._persisted_row_count:
             return self._pending_cells[index - self._persisted_row_count]
         batch_index, inner_index = self._locate_persisted_row(index)
-        return self._get_batch_values(batch_index)[inner_index]
+        return self._backend[batch_index][inner_index]
 
     def __setitem__(self, index: int, value: Any) -> None:
         """Replace one list cell."""
@@ -568,6 +608,13 @@ def batch_rows(self) -> int | None:
             return self.spec.batch_rows
         return None
 
+    @property
+    def items_per_block(self) -> int | None:
+        """Maximum number of list cells per internal compressed block."""
+        if self.spec.storage != "batch":
+            return None
+        return self._backend.items_per_block
+
     @property
     def nbytes(self) -> int:
         """Uncompressed byte size reported by the backend."""
@@ -597,6 +644,8 @@ def info_items(self) -> list:
             ("backend", self.spec.storage),
             ("serializer", self.spec.serializer),
             ("rows", len(self)),
+            ("batch_rows", self.batch_rows),
+            ("items_per_block", self.items_per_block),
             ("pending_rows", len(self._pending_cells) if self.spec.storage == "batch" else 0),
             ("nbytes", format_nbytes_info(self.nbytes)),
             ("cbytes", format_nbytes_info(self.cbytes)),
diff --git a/src/blosc2/schema.py b/src/blosc2/schema.py
index c39c034e..b9d00852 100644
--- a/src/blosc2/schema.py
+++ b/src/blosc2/schema.py
@@ -761,6 +761,114 @@ def vlbytes(
     )
 
 
+# ---------------------------------------------------------------------------
+# Dictionary spec
+# ---------------------------------------------------------------------------
+
+
+class DictionarySpec(SchemaSpec):
+    """Dictionary-encoded string column stored as int32 codes with a global string dictionary.
+
+    Each row value is a plain Python ``str`` (or ``None`` when nullable).
+    Internally the column stores compact integer codes (``int32``) in an NDArray,
+    with a separate append-only variable-length string array holding the unique
+    category values.  This matches Arrow dictionary encoding semantics.
+
+    Parameters
+    ----------
+    index_type:
+        Must be :class:`int32`.  The physical dtype for category codes.
+    value_type:
+        Must be :class:`VLStringSpec`.  The type of dictionary values.
+    ordered:
+        If ``True``, the dictionary has semantic ordering.  Ordered comparisons
+        (``<``, ``>``) are not implemented in v1 but the flag is stored and
+        exported to Arrow.
+    nullable:
+        If ``True`` (default), null row slots are allowed.  Nulls are represented
+        internally by the reserved code ``null_code`` (default ``-1``).
+    null_code:
+        The reserved code value for null slots.  Default is ``-1``.
+    """
+
+    python_type = str
+    dtype = None  # physical codes are int32, but logical type is str
+
+    def __init__(
+        self,
+        *,
+        index_type=None,
+        value_type=None,
+        ordered: _builtin_bool = False,
+        nullable: _builtin_bool = True,
+        null_code: int = -1,
+    ):
+        from blosc2.schema import int32 as _int32
+
+        if index_type is not None and not isinstance(index_type, _int32):
+            raise TypeError(
+                f"DictionarySpec index_type must be blosc2.int32() in v1; got {type(index_type).__name__!r}"
+            )
+        if value_type is not None and not isinstance(value_type, VLStringSpec):
+            raise TypeError(
+                "DictionarySpec value_type must be blosc2.vlstring() in v1; "
+                f"got {type(value_type).__name__!r}"
+            )
+        self.index_type = index_type if index_type is not None else _int32()
+        self.value_type = value_type if value_type is not None else VLStringSpec()
+        self.ordered = _builtin_bool(ordered)
+        self.nullable = _builtin_bool(nullable)
+        self.null_code = int(null_code)
+
+    def to_pydantic_kwargs(self) -> dict[str, Any]:
+        return {}
+
+    def to_metadata_dict(self) -> dict[str, Any]:
+        return {
+            "kind": "dictionary",
+            "index_type": self.index_type.to_metadata_dict(),
+            "value_type": self.value_type.to_metadata_dict(),
+            "ordered": self.ordered,
+            "nullable": self.nullable,
+            "null_code": self.null_code,
+        }
+
+
+def dictionary(
+    *,
+    index_type=None,
+    value_type=None,
+    ordered: bool = False,
+    nullable: bool = True,
+) -> DictionarySpec:
+    """Build a dictionary-encoded string column descriptor.
+
+    Dictionary columns store repeated string values as compact ``int32`` codes
+    with a separate global dictionary of unique string values.  This matches
+    Arrow dictionary encoding and is ideal for low-cardinality string columns
+    such as categories or enumerated values.
+
+    Parameters
+    ----------
+    index_type:
+        The physical type for category codes.  Must be ``blosc2.int32()`` in v1.
+        Defaults to ``blosc2.int32()`` when not specified.
+    value_type:
+        The type of dictionary values.  Must be ``blosc2.vlstring()`` in v1.
+        Defaults to ``blosc2.vlstring()`` when not specified.
+    ordered:
+        If ``True``, dictionary order is semantically meaningful.
+    nullable:
+        If ``True`` (default), null row values are allowed (stored as code ``-1``).
+    """
+    return DictionarySpec(
+        index_type=index_type,
+        value_type=value_type,
+        ordered=ordered,
+        nullable=nullable,
+    )
+
+
 def struct(fields: dict[str, SchemaSpec], *, nullable: bool = False) -> StructSpec:
     """Build a structured schema descriptor for dict-like CTable values.
 
diff --git a/src/blosc2/schema_compiler.py b/src/blosc2/schema_compiler.py
index a6cfe04f..006f9e22 100644
--- a/src/blosc2/schema_compiler.py
+++ b/src/blosc2/schema_compiler.py
@@ -22,6 +22,7 @@
 
 from blosc2.schema import (
     BLOSC2_FIELD_METADATA_KEY,
+    DictionarySpec,
     ListSpec,
     ObjectSpec,
     SchemaSpec,
@@ -76,6 +77,8 @@
     "vlbytes": VLBytesSpec,
     "object": ObjectSpec,
     "timestamp": timestamp,
+    # dictionary
+    "dictionary": DictionarySpec,
 }
 
 # ---------------------------------------------------------------------------
@@ -102,6 +105,8 @@
 
 def compute_display_width(spec: SchemaSpec) -> int:
     """Return a reasonable terminal display width for *spec*'s column."""
+    if isinstance(spec, DictionarySpec):
+        return 32
     if isinstance(spec, (VLStringSpec, VLBytesSpec, ObjectSpec)):
         return 40
     if isinstance(spec, (ListSpec, StructSpec)):
@@ -211,7 +216,8 @@ def validate_annotation_matches_spec(name: str, annotation: Any, spec: SchemaSpe
         origin = typing.get_origin(annotation)
         if origin not in (list, list):
             raise TypeError(
-                f"Column {name!r}: annotation {annotation!r} is incompatible with list spec; expected list[T]."
+                f"Column {name!r}: annotation {annotation!r} is incompatible with list spec; "
+                "expected list[T]."
             )
         args = typing.get_args(annotation)
         if len(args) != 1:
@@ -225,6 +231,14 @@ def validate_annotation_matches_spec(name: str, annotation: Any, spec: SchemaSpe
             )
         return
 
+    if isinstance(spec, DictionarySpec):
+        if annotation is not str:
+            raise TypeError(
+                f"Column {name!r}: annotation {annotation!r} is incompatible with "
+                f"DictionarySpec (expected str)."
+            )
+        return
+
     if isinstance(spec, timestamp):
         if annotation in (object, np.datetime64, datetime.datetime, str, int):
             return
@@ -259,15 +273,15 @@ def _validate_column_name(name: str) -> None:
 
     * must be a non-empty string
     * must not start with ``_``  (reserved for internal table layout)
-    * must not contain ``/``     (used as path separator in persistent layout)
     * must not be one of the reserved internal names
+
+    Literal ``/`` characters are allowed in logical names; persistent CTable
+    storage percent-encodes path segments before writing under ``_cols``.
     """
     if not name:
         raise ValueError("Column name cannot be empty.")
     if name.startswith("_"):
         raise ValueError(f"Column name cannot start with '_' (reserved for internal use): {name!r}")
-    if "/" in name:
-        raise ValueError(f"Column name cannot contain '/': {name!r}")
     if name in _RESERVED_COLUMN_NAMES:
         raise ValueError(f"Column name {name!r} is reserved for internal CTable use.")
 
@@ -404,6 +418,10 @@ def spec_from_metadata_dict(data: dict[str, Any]) -> SchemaSpec:
         return ListSpec(item_spec, **data)
     if kind == "struct":
         return StructSpec.from_metadata_dict({"fields": data.pop("fields"), **data})
+    if kind == "dictionary":
+        index_type = spec_from_metadata_dict(data.pop("index_type"))
+        value_type = spec_from_metadata_dict(data.pop("value_type"))
+        return DictionarySpec(index_type=index_type, value_type=value_type, **data)
     spec_cls = _KIND_TO_SPEC.get(kind)
     if spec_cls is None:
         raise ValueError(f"Unknown column kind {kind!r}")
@@ -447,8 +465,9 @@ def schema_to_dict(schema: CompiledSchema) -> dict[str, Any]:
             entry["blocks"] = list(col.config.blocks)
         cols.append(entry)
 
+    schema_version = 2 if schema.metadata.get("nested") is not None else 1
     result = {
-        "version": 1,
+        "version": schema_version,
         "row_cls": schema.row_cls.__name__ if schema.row_cls is not None else None,
         "columns": cols,
     }
@@ -470,7 +489,7 @@ def schema_from_dict(data: dict[str, Any]) -> CompiledSchema:
         If *data* uses an unknown schema version or an unknown column kind.
     """
     version = data.get("version", 1)
-    if version != 1:
+    if version not in (1, 2):
         raise ValueError(f"Unsupported schema version {version!r}")
 
     columns: list[CompiledColumn] = []
diff --git a/src/blosc2/schunk.py b/src/blosc2/schunk.py
index 35182bea..e765dcdc 100644
--- a/src/blosc2/schunk.py
+++ b/src/blosc2/schunk.py
@@ -1742,12 +1742,9 @@ def _open_treestore_root_object(store, urlpath, mode):
     if manifest["kind"] == "ctable":
         if mode not in {"r", "a"}:
             return store
-        # Discard the probe store without repacking — it was only opened
-        # to peek at the manifest.  A full close() would trigger to_b2z()
-        # even though nothing was modified, and CTable.open() below will
-        # create its own store anyway.
-        store.discard()
-        return blosc2.CTable.open(urlpath, mode=mode)
+        # Reuse the TreeStore that was opened to inspect the root manifest.
+        # This avoids a second TreeStore open when dispatching root CTables.
+        return blosc2.CTable._open_from_existing_filestore(urlpath, mode=mode, store=store)
 
     return store
 
diff --git a/src/blosc2/utils.py b/src/blosc2/utils.py
index 4e02341c..6483982f 100644
--- a/src/blosc2/utils.py
+++ b/src/blosc2/utils.py
@@ -7,6 +7,7 @@
 
 import ast
 import builtins
+import contextlib
 import inspect
 import math
 import sys
@@ -88,41 +89,6 @@ def format_expr_scalar(value):
     return value
 
 
-global safe_numpy_globals
-# Use numpy eval when running in WebAssembly
-safe_numpy_globals = {"np": np}
-# Add all first-level numpy functions
-safe_numpy_globals.update(
-    {name: getattr(np, name) for name in dir(np) if callable(getattr(np, name)) and not name.startswith("_")}
-)
-
-if not NUMPY_GE_2_0:  # handle non-array-api compliance
-    safe_numpy_globals["acos"] = np.arccos
-    safe_numpy_globals["acosh"] = np.arccosh
-    safe_numpy_globals["asin"] = np.arcsin
-    safe_numpy_globals["asinh"] = np.arcsinh
-    safe_numpy_globals["atan"] = np.arctan
-    safe_numpy_globals["atanh"] = np.arctanh
-    safe_numpy_globals["atan2"] = np.arctan2
-    safe_numpy_globals["permute_dims"] = np.transpose
-    safe_numpy_globals["pow"] = np.power
-    safe_numpy_globals["bitwise_left_shift"] = np.left_shift
-    safe_numpy_globals["bitwise_right_shift"] = np.right_shift
-    safe_numpy_globals["bitwise_invert"] = np.bitwise_not
-    safe_numpy_globals["concat"] = np.concatenate
-    safe_numpy_globals["matrix_transpose"] = np.transpose
-    safe_numpy_globals["vecdot"] = npvecdot
-    safe_numpy_globals["cumulative_sum"] = npcumsum
-    safe_numpy_globals["cumulative_prod"] = npcumprod
-
-# handle different naming conventions between numpy and blosc2
-safe_numpy_globals["contains"] = _string_contains
-safe_numpy_globals["startswith"] = _string_startswith
-safe_numpy_globals["endswith"] = _string_endswith
-safe_numpy_globals["upper"] = _string_upper
-safe_numpy_globals["lower"] = _string_lower
-
-
 elementwise_funcs = [
     "abs",
     "acos",
@@ -267,6 +233,66 @@ def format_expr_scalar(value):
 constructors += ["reshape"]
 
 
+_NUMPY_ALIASES = {
+    "acos": np.arccos,
+    "acosh": np.arccosh,
+    "asin": np.arcsin,
+    "asinh": np.arcsinh,
+    "atan": np.arctan,
+    "atanh": np.arctanh,
+    "atan2": np.arctan2,
+    "concat": getattr(np, "concat", np.concatenate),
+    "contains": _string_contains,
+    "cumulative_prod": npcumprod,
+    "cumulative_sum": npcumsum,
+    "endswith": _string_endswith,
+    "lower": _string_lower,
+    "matrix_transpose": getattr(np, "matrix_transpose", np.transpose),
+    "permute_dims": nptranspose,
+    "pow": np.power,
+    "startswith": _string_startswith,
+    "upper": _string_upper,
+    "vecdot": npvecdot,
+}
+if not NUMPY_GE_2_0:  # handle non-array-api compliance
+    _NUMPY_ALIASES.update(
+        {
+            "bitwise_invert": np.bitwise_not,
+            "bitwise_left_shift": np.left_shift,
+            "bitwise_right_shift": np.right_shift,
+        }
+    )
+
+# Use numpy eval when running in WebAssembly.  Keep this intentionally small:
+# scanning every callable in numpy triggers lazy imports such as numpy.f2py and
+# numpy.testing during ``import blosc2``.
+safe_numpy_globals = {"np": np, **_NUMPY_ALIASES}
+for _name in set(elementwise_funcs + linalg_funcs + reducers + constructors):
+    if _name not in safe_numpy_globals and not _name.startswith("_"):
+        with contextlib.suppress(AttributeError):
+            _value = getattr(np, _name)
+            if callable(_value):
+                safe_numpy_globals[_name] = _value
+
+
+def populate_safe_numpy_globals(expression: str) -> None:
+    """Add bare numpy call names used by *expression* to safe_numpy_globals."""
+    try:
+        tree = ast.parse(expression, mode="eval")
+    except SyntaxError:
+        return
+    for node in ast.walk(tree):
+        if not isinstance(node, ast.Call) or not isinstance(node.func, ast.Name):
+            continue
+        name = node.func.id
+        if name in safe_numpy_globals or name.startswith("_"):
+            continue
+        with contextlib.suppress(AttributeError):
+            value = getattr(np, name)
+            if callable(value):
+                safe_numpy_globals[name] = value
+
+
 # --- Shape utilities ---
 def linalg_shape(func_name, args, kwargs):  # noqa: C901
     # --- Linear algebra and tensor manipulation ---
diff --git a/tests/ctable/test_arrow_interop.py b/tests/ctable/test_arrow_interop.py
index f3f28d9c..8bc1bde9 100644
--- a/tests/ctable/test_arrow_interop.py
+++ b/tests/ctable/test_arrow_interop.py
@@ -7,6 +7,7 @@
 
 """Tests for CTable.to_arrow() and CTable.from_arrow()."""
 
+import datetime
 from dataclasses import dataclass
 
 import numpy as np
@@ -310,6 +311,34 @@ def test_from_arrow_list_struct_nullable_values_roundtrip():
     assert t[2].nutriments == [{"name": "energy", "value": 42.0}]
 
 
+def test_from_arrow_list_struct_timestamp_roundtrip():
+    event_type = pa.struct(
+        [
+            pa.field("when", pa.timestamp("ms")),
+            pa.field("value", pa.float64()),
+        ]
+    )
+    at = pa.table(
+        {
+            "events": pa.array(
+                [
+                    [{"when": datetime.datetime(2020, 1, 1), "value": 1.5}],
+                    None,
+                ],
+                type=pa.list_(event_type),
+            )
+        }
+    )
+
+    t = CTable.from_arrow(at.schema, at.to_batches())
+    assert t[0].events == [{"when": 1577836800000, "value": 1.5}]
+    assert t[1].events is None
+
+    out = t.to_arrow()
+    assert out.schema.field("events").type == pa.list_(event_type)
+    assert out.column("events").to_pylist()[0][0]["when"].isoformat() == "2020-01-01T00:00:00"
+
+
 def test_from_arrow_unsupported_type_raises():
     at = pa.table({"duration": pa.array([1, 2, 3], type=pa.duration("s"))})
     with pytest.raises(TypeError, match="No blosc2 spec"):
diff --git a/tests/ctable/test_column.py b/tests/ctable/test_column.py
index 41f2cfa1..63b06947 100644
--- a/tests/ctable/test_column.py
+++ b/tests/ctable/test_column.py
@@ -26,6 +26,12 @@ class StrRow:
     label: str = blosc2.field(blosc2.string(max_length=16))
 
 
+@dataclass
+class DictRow:
+    vendor: str = blosc2.field(blosc2.dictionary())
+    fare: float = blosc2.field(blosc2.float64())
+
+
 DATA20 = [(i, float(i * 10), True) for i in range(20)]
 
 
@@ -50,6 +56,49 @@ def test_column_metadata():
     assert tabla.score._mask is None
 
 
+def test_column_float32_repr_uses_numpy_formatting():
+    """Column/table repr uses compact NumPy-style formatting for float32 previews."""
+
+    @dataclass
+    class Float32Row:
+        value: float = blosc2.field(blosc2.float32())
+
+    tabla = CTable(Float32Row, new_data=[(222.22,), (210.8,)])
+    col_text = repr(tabla.value)
+    table_text = str(tabla)
+
+    assert "222.22" in col_text
+    assert "222.22000122070312" not in col_text
+    assert "222.22" in table_text
+    assert "222.22000122070312" not in table_text
+
+
+def test_column_info():
+    """Column.info reports logical and physical storage details."""
+    tabla = CTable(Row, new_data=DATA20)
+    info = tabla.score.info
+    text = repr(info)
+
+    assert len(info) == len(tabla.score.info_items)
+    assert ("type", "Column") in tabla.score.info_items
+    assert ("name", "score") in tabla.score.info_items
+    assert "logical_length" in text
+    assert "physical_length" in text
+    assert "logical_shape" not in text
+    assert "table_physical_length" not in text
+    assert "storage" in text
+
+
+def test_dictionary_column_info():
+    """Dictionary Column.info reports dictionary-specific details without code-shape duplication."""
+    tabla = CTable(DictRow, new_data=[("Uber", 10.5), ("Lyft", 7.2), ("Uber", 15.0)])
+    text = repr(tabla.vendor.info)
+
+    assert "dictionary_size" in text
+    assert "dictionary[str]" in text
+    assert "codes_shape" not in text
+
+
 def test_column_getitem_no_holes():
     """int, slice, and list indexing on a full table."""
     tabla = CTable(Row, new_data=DATA20)
diff --git a/tests/ctable/test_ctable_indexing.py b/tests/ctable/test_ctable_indexing.py
index 672c3942..000d5ed0 100644
--- a/tests/ctable/test_ctable_indexing.py
+++ b/tests/ctable/test_ctable_indexing.py
@@ -306,6 +306,7 @@ def test_catalog_survives_reopen(tmpdir):
     assert not idxs[0].stale
 
 
+@pytest.mark.heavy
 def test_where_with_index_matches_scan_persistent(tmpdir):
     path = str(tmpdir / "table.b2d")
     t = _make_table(200, persistent_path=path)
@@ -320,6 +321,7 @@ def test_where_with_index_matches_scan_persistent(tmpdir):
     assert ids_idx == ids_scan
 
 
+@pytest.mark.heavy
 def test_persistent_index_drop_releases_sidecars_without_gc(tmpdir):
     import gc
 
diff --git a/tests/ctable/test_dictionary_column.py b/tests/ctable/test_dictionary_column.py
new file mode 100644
index 00000000..13dfbb8e
--- /dev/null
+++ b/tests/ctable/test_dictionary_column.py
@@ -0,0 +1,485 @@
+#######################################################################
+# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#######################################################################
+"""Tests for the CTable dictionary column type."""
+
+from __future__ import annotations
+
+from dataclasses import dataclass
+
+import pytest
+
+import blosc2
+from blosc2 import CTable, DictionarySpec
+from blosc2.dictionary_column import DictionaryColumn
+from blosc2.schema_compiler import compile_schema, schema_from_dict, schema_to_dict
+
+pa = pytest.importorskip("pyarrow")
+pq = pytest.importorskip("pyarrow.parquet")
+
+
+# ---------------------------------------------------------------------------
+# Unit tests: DictionarySpec and schema compiler
+# ---------------------------------------------------------------------------
+
+
+class TestDictionarySpec:
+    def test_default_construction(self):
+        spec = blosc2.dictionary()
+        assert spec.ordered is False
+        assert spec.nullable is True
+        assert spec.null_code == -1
+
+    def test_wrong_index_type_raises(self):
+        with pytest.raises(TypeError, match="int32"):
+            blosc2.dictionary(index_type=blosc2.int64())
+
+    def test_wrong_value_type_raises(self):
+        with pytest.raises(TypeError, match="vlstring"):
+            blosc2.dictionary(value_type=blosc2.string(max_length=32))
+
+    def test_metadata_roundtrip(self):
+        spec = blosc2.dictionary(ordered=True, nullable=False)
+        d = spec.to_metadata_dict()
+        assert d["kind"] == "dictionary"
+        assert d["ordered"] is True
+        assert d["nullable"] is False
+        assert d["null_code"] == -1
+
+    def test_schema_serialization_roundtrip(self):
+        @dataclass
+        class Row:
+            vendor: str = blosc2.field(blosc2.dictionary())
+            fare: float = blosc2.field(blosc2.float64())
+
+        schema = compile_schema(Row)
+        d = schema_to_dict(schema)
+        schema2 = schema_from_dict(d)
+        col = schema2.columns_by_name["vendor"]
+        assert isinstance(col.spec, DictionarySpec)
+        assert col.spec.ordered is False
+        assert col.spec.nullable is True
+
+    def test_dataclass_annotation_must_be_str(self):
+        from blosc2.schema_compiler import validate_annotation_matches_spec
+
+        spec = blosc2.dictionary()
+        with pytest.raises(TypeError, match="str"):
+            validate_annotation_matches_spec("x", int, spec)
+
+    def test_dataclass_annotation_str_ok(self):
+        from blosc2.schema_compiler import validate_annotation_matches_spec
+
+        spec = blosc2.dictionary()
+        validate_annotation_matches_spec("x", str, spec)  # should not raise
+
+
+# ---------------------------------------------------------------------------
+# CTable behavior tests
+# ---------------------------------------------------------------------------
+
+
+@dataclass
+class TripRow:
+    vendor: str = blosc2.field(blosc2.dictionary())
+    fare: float = blosc2.field(blosc2.float64())
+
+
+DATA = [
+    {"vendor": "Uber", "fare": 10.5},
+    {"vendor": "Lyft", "fare": 7.2},
+    {"vendor": "Uber", "fare": 15.0},
+    {"vendor": "Via", "fare": 5.0},
+]
+
+# Tuple form for extend()
+DATA_TUPLES = [
+    ("Uber", 10.5),
+    ("Lyft", 7.2),
+    ("Uber", 15.0),
+    ("Via", 5.0),
+]
+
+
+def _logical_mask_values(ct, mask):
+    """Materialize a physical predicate as logical/live-row values."""
+    arr = mask.compute() if isinstance(mask, blosc2.LazyExpr) else mask
+    arr = arr[:] if isinstance(arr, blosc2.NDArray) else arr
+    return arr[ct._valid_rows[:]].tolist()
+
+
+class TestCTableBehavior:
+    def test_append_and_read(self):
+        ct = CTable(TripRow)
+        for row in DATA:
+            ct.append(row)
+        assert ct.nrows == 4
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", "Via"]
+        assert ct["vendor"][0] == "Uber"
+        assert ct["vendor"][1] == "Lyft"
+
+    def test_repeated_strings_reuse_codes(self):
+        ct = CTable(TripRow)
+        for row in DATA:
+            ct.append(row)
+        codes = ct._cols["vendor"].codes[:4].tolist()
+        assert codes[0] == codes[2]  # "Uber" appears twice with same code
+        assert len(ct._cols["vendor"].dictionary) == 3  # Uber, Lyft, Via
+
+    def test_null_slot(self):
+        ct = CTable(TripRow)
+        ct.append({"vendor": None, "fare": 0.0})
+        assert ct["vendor"][0] is None
+        assert ct._cols["vendor"].codes[0] == -1
+
+    def test_nullable_false_rejects_null(self):
+        @dataclass
+        class NNRow:
+            vendor: str = blosc2.field(blosc2.dictionary(nullable=False))
+            fare: float = blosc2.field(blosc2.float64())
+
+        ct = CTable(NNRow)
+        with pytest.raises((ValueError, TypeError)):
+            ct.append({"vendor": None, "fare": 0.0})
+
+    def test_invalid_type_raises(self):
+        ct = CTable(TripRow)
+        with pytest.raises((TypeError, ValueError)):
+            ct.append({"vendor": 42, "fare": 0.0})
+
+    def test_extend_batch(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        assert ct.nrows == 4
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", "Via"]
+
+    def test_codes_and_dictionary_properties(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        dc = ct._cols["vendor"]
+        assert isinstance(dc, DictionaryColumn)
+        assert list(dc.dictionary) == ["Uber", "Lyft", "Via"]
+        codes = dc.codes[:4].tolist()
+        assert codes == [0, 1, 0, 2]
+
+    def test_equality_filter(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        mask = ct["vendor"] == "Uber"
+        assert _logical_mask_values(ct, mask) == [True, False, True, False]
+
+    def test_equality_absent_value_returns_false(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        mask = ct["vendor"] == "Waymo"
+        assert _logical_mask_values(ct, mask) == [False, False, False, False]
+
+    def test_equality_none(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        ct.append({"vendor": None, "fare": 0.0})
+        mask = ct["vendor"] == None  # noqa: E711
+        assert _logical_mask_values(ct, mask) == [False, False, False, False, True]
+
+    def test_dictionary_predicate_combines_with_regular_predicate_in_aggregate(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        assert ct["fare"].sum(where=(ct["fare"] > 6) & (ct["vendor"] == "Uber")) == pytest.approx(25.5)
+
+    def test_isin(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        mask = ct["vendor"].isin(["Uber", "Via"])
+        assert mask.tolist() == [True, False, True, True]
+
+    def test_isin_absent_values(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        mask = ct["vendor"].isin(["Waymo"])
+        assert all(not v for v in mask.tolist())
+
+    def test_is_null(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        ct.append({"vendor": None, "fare": 0.0})
+        assert _logical_mask_values(ct, ct["vendor"].is_null()) == [False, False, False, False, True]
+
+    def test_null_count(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        ct.append({"vendor": None, "fare": 0.0})
+        assert ct["vendor"].null_count() == 1
+
+    def test_is_dictionary_property(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        assert ct["vendor"].is_dictionary is True
+        assert ct["fare"].is_dictionary is False
+
+
+# ---------------------------------------------------------------------------
+# Persistence tests
+# ---------------------------------------------------------------------------
+
+
+class TestPersistence:
+    def test_b2d_roundtrip(self, tmp_path):
+        p = str(tmp_path / "trips.b2d")
+        ct = CTable(TripRow, urlpath=p, mode="w")
+        ct.extend(DATA_TUPLES)
+        ct.close()
+
+        ct2 = CTable.open(p, mode="r")
+        assert ct2.nrows == 4
+        assert ct2["vendor"][:] == ["Uber", "Lyft", "Uber", "Via"]
+        assert ct2._cols["vendor"].dictionary == ["Uber", "Lyft", "Via"]
+        ct2.close()
+
+    def test_b2z_roundtrip(self, tmp_path):
+        p = str(tmp_path / "trips.b2z")
+        ct = CTable(TripRow, urlpath=p, mode="w")
+        ct.extend(DATA_TUPLES)
+        ct.close()
+
+        ct2 = CTable.open(p, mode="r")
+        assert ct2.nrows == 4
+        assert ct2["vendor"][:] == ["Uber", "Lyft", "Uber", "Via"]
+        ct2.close()
+
+
+# ---------------------------------------------------------------------------
+# Arrow import / export tests
+# ---------------------------------------------------------------------------
+
+
+class TestArrowInterop:
+    def _make_arrow_table(self, index_type=None, value_type=None, values=None, ordered=False):
+        if index_type is None:
+            index_type = pa.int32()
+        if value_type is None:
+            value_type = pa.string()
+        if values is None:
+            values = ["Uber", "Lyft", "Uber", None]
+        return pa.table(
+            {
+                "vendor": pa.array(values, type=pa.dictionary(index_type, value_type, ordered=ordered)),
+                "fare": pa.array([10.5, 7.2, 15.0, 0.0], type=pa.float64()),
+            }
+        )
+
+    def test_import_dict_int32(self):
+        at = self._make_arrow_table(index_type=pa.int32())
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", None]
+
+    def test_import_dict_int8(self):
+        at = self._make_arrow_table(index_type=pa.int8())
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", None]
+
+    def test_import_dict_int16(self):
+        at = self._make_arrow_table(index_type=pa.int16())
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", None]
+
+    def test_import_dict_int64(self):
+        at = self._make_arrow_table(index_type=pa.int64())
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", None]
+
+    def test_import_dict_uint8(self):
+        at = self._make_arrow_table(index_type=pa.uint8())
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", None]
+
+    def test_import_dict_uint32(self):
+        at = self._make_arrow_table(index_type=pa.uint32())
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", None]
+
+    def test_import_nulls_preserved(self):
+        at = self._make_arrow_table(values=["A", None, "B", None])
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        assert ct["vendor"][:] == ["A", None, "B", None]
+        assert ct._cols["vendor"].codes[:4].tolist() == [0, -1, 1, -1]
+
+    def test_export_produces_dict_type(self):
+        at = self._make_arrow_table()
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        (batch,) = ct.iter_arrow_batches()
+        field = batch.schema.field("vendor")
+        assert pa.types.is_dictionary(field.type)
+        assert field.type.index_type == pa.int32()
+        assert field.type.value_type == pa.string()
+
+    def test_export_values_match(self):
+        at = self._make_arrow_table()
+        ct = CTable.from_arrow(at.schema, at.to_batches())
+        (batch,) = ct.iter_arrow_batches()
+        assert batch.column("vendor").to_pylist() == ["Uber", "Lyft", "Uber", None]
+
+    def test_parquet_roundtrip(self, tmp_path):
+        path = tmp_path / "test.parquet"
+        at = self._make_arrow_table(values=["Uber", "Lyft", "Uber", "Via"])
+        pq.write_table(at, path)
+        ct = CTable.from_parquet(path)
+        assert isinstance(ct._schema.columns_by_name["vendor"].spec, DictionarySpec)
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", "Via"]
+
+        path2 = tmp_path / "roundtrip.parquet"
+        ct.to_parquet(path2)
+        at2 = pq.read_table(path2)
+        assert pa.types.is_dictionary(at2.schema.field("vendor").type)
+        assert at2.column("vendor").to_pylist() == ["Uber", "Lyft", "Uber", "Via"]
+
+    def test_chunked_dict_unification(self):
+        """Two batches with different chunk-local dictionaries → global unification."""
+        batch1 = pa.record_batch(
+            {"vendor": pa.array(["Uber", "Lyft"], type=pa.dictionary(pa.int32(), pa.string()))},
+            schema=pa.schema([pa.field("vendor", pa.dictionary(pa.int32(), pa.string()))]),
+        )
+        batch2 = pa.record_batch(
+            {"vendor": pa.array(["Via", "Uber"], type=pa.dictionary(pa.int32(), pa.string()))},
+            schema=pa.schema([pa.field("vendor", pa.dictionary(pa.int32(), pa.string()))]),
+        )
+        schema = pa.schema([pa.field("vendor", pa.dictionary(pa.int32(), pa.string()))])
+        ct = CTable.from_arrow(schema, [batch1, batch2])
+        assert ct["vendor"][:] == ["Uber", "Lyft", "Via", "Uber"]
+        codes = ct._cols["vendor"].codes[:4].tolist()
+        # Uber should have the same code in both positions
+        assert codes[0] == codes[3]
+
+    def test_ordered_dict_inconsistent_order_raises(self):
+        schema = pa.schema([pa.field("x", pa.dictionary(pa.int32(), pa.string(), ordered=True))])
+        batch1 = pa.record_batch(
+            {"x": pa.array(["A", "B"], type=pa.dictionary(pa.int32(), pa.string(), ordered=True))},
+            schema=schema,
+        )
+        # Batch2 has different order for existing values
+        batch2 = pa.record_batch(
+            {"x": pa.array(["B", "A"], type=pa.dictionary(pa.int32(), pa.string(), ordered=True))},
+            schema=schema,
+        )
+        with pytest.raises(ValueError, match="ordered"):
+            CTable.from_arrow(schema, [batch1, batch2])
+
+    def test_unsupported_dict_value_type_raises(self):
+        schema = pa.schema([pa.field("x", pa.dictionary(pa.int32(), pa.int64()))])
+        at = pa.table({"x": pa.array([1, 2], type=pa.dictionary(pa.int32(), pa.int64()))})
+        with pytest.raises(TypeError, match="dictionary"):
+            CTable.from_arrow(schema, at.to_batches())
+
+
+# ---------------------------------------------------------------------------
+# Index tests
+# ---------------------------------------------------------------------------
+
+
+class TestIndex:
+    def test_create_index(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        idx = ct.create_index("vendor")
+        assert idx is not None
+
+    def test_index_metadata_is_logical(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        ct.create_index("vendor")
+        catalog = ct._storage.load_index_catalog()
+        assert "vendor" in catalog
+
+    def test_equality_uses_codes(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        mask = ct["vendor"] == "Uber"
+        assert _logical_mask_values(ct, mask) == [True, False, True, False]
+
+    def test_isin_uses_codes(self):
+        ct = CTable(TripRow)
+        ct.extend(DATA_TUPLES)
+        mask = ct["vendor"].isin(["Lyft", "Via"])
+        assert mask.tolist() == [False, True, False, True]
+
+    def test_append_after_index(self, tmp_path):
+        p = str(tmp_path / "indexed.b2d")
+        ct = CTable(TripRow, urlpath=p, mode="w")
+        ct.extend(DATA_TUPLES)
+        ct.create_index("vendor")
+        ct.append({"vendor": "Uber", "fare": 20.0})
+        assert ct.nrows == 5
+        mask = ct["vendor"] == "Uber"
+        assert mask.sum() == 3
+        ct.close()
+
+
+# ---------------------------------------------------------------------------
+# CLI tests
+# ---------------------------------------------------------------------------
+
+
+def test_cli_preserves_dict_by_default(tmp_path):
+    from blosc2.cli.parquet_to_blosc2 import main
+
+    path = tmp_path / "dict.parquet"
+    out = tmp_path / "dict.b2d"
+    at = pa.table(
+        {"vendor": pa.array(["Uber", "Lyft", "Uber", "Via"], type=pa.dictionary(pa.int32(), pa.string()))}
+    )
+    pq.write_table(at, path)
+
+    assert main([str(path), str(out)]) == 0
+
+    ct = CTable.open(str(out), mode="r")
+    assert isinstance(ct._schema.columns_by_name["vendor"].spec, DictionarySpec)
+    assert ct["vendor"][:] == ["Uber", "Lyft", "Uber", "Via"]
+    ct.close()
+
+
+def test_cli_decode_dictionaries_flag(tmp_path):
+    from blosc2.cli.parquet_to_blosc2 import main
+    from blosc2.schema import VLStringSpec
+
+    path = tmp_path / "dict.parquet"
+    out = tmp_path / "dict_decoded.b2d"
+    at = pa.table(
+        {"vendor": pa.array(["Uber", "Lyft", "Uber"], type=pa.dictionary(pa.int32(), pa.string()))}
+    )
+    pq.write_table(at, path)
+
+    assert main(["--decode-dictionaries", str(path), str(out)]) == 0
+
+    ct = CTable.open(str(out), mode="r")
+    assert isinstance(ct._schema.columns_by_name["vendor"].spec, VLStringSpec)
+    assert ct["vendor"][:] == ["Uber", "Lyft", "Uber"]
+    ct.close()
+
+
+def test_cli_dict_export_roundtrip(tmp_path):
+    from blosc2.cli.parquet_to_blosc2 import main
+
+    path = tmp_path / "dict.parquet"
+    out = tmp_path / "dict.b2d"
+    exported = tmp_path / "dict_exported.parquet"
+
+    at = pa.table(
+        {
+            "vendor": pa.array(["Uber", "Lyft", None, "Via"], type=pa.dictionary(pa.int32(), pa.string())),
+            "score": pa.array([1, 2, 3, 4], type=pa.int32()),
+        }
+    )
+    pq.write_table(at, path)
+
+    assert main([str(path), str(out)]) == 0
+    assert main(["--export", str(out), str(exported)]) == 0
+
+    rt = pq.read_table(exported)
+    assert rt.column("vendor").to_pylist() == ["Uber", "Lyft", None, "Via"]
+    assert rt.column("score").to_pylist() == [1, 2, 3, 4]
+
+
+if __name__ == "__main__":
+    pytest.main(["-v", __file__])
diff --git a/tests/ctable/test_getitem_access.py b/tests/ctable/test_getitem_access.py
new file mode 100644
index 00000000..d0f25ad6
--- /dev/null
+++ b/tests/ctable/test_getitem_access.py
@@ -0,0 +1,145 @@
+#######################################################################
+# Copyright (c) 2019-present, Blosc Development Team <blosc@blosc.org>
+# All rights reserved.
+#
+# SPDX-License-Identifier: BSD-3-Clause
+#######################################################################
+
+from dataclasses import dataclass
+
+import numpy as np
+import pytest
+
+import blosc2
+from blosc2 import CTable
+from blosc2.ctable import Column
+
+
+@dataclass
+class AccessRow:
+    id: int = blosc2.field(blosc2.int64())
+    score: float = blosc2.field(blosc2.float64())
+    active: bool = blosc2.field(blosc2.bool())
+    note: str = blosc2.field(blosc2.vlstring(nullable=True))
+    tags: list[int] = blosc2.field(blosc2.list(blosc2.int64(), nullable=True))  # noqa: RUF009
+
+
+DATA = [
+    (0, 1.5, True, "zero", [0, 1]),
+    (1, 2.5, False, None, None),
+    (2, 3.5, True, "two", [2]),
+    (3, 4.5, False, "three", [3, 4]),
+]
+
+
+def test_getitem_string_column():
+    t = CTable(AccessRow, new_data=DATA)
+    col = t["id"]
+    assert isinstance(col, Column)
+    assert list(col) == [0, 1, 2, 3]
+
+
+def test_getitem_int_returns_namedtuple_row():
+    t = CTable(AccessRow, new_data=DATA)
+    row = t[1]
+    assert row.id == 1
+    assert row.score == 2.5
+    assert row.active is False
+    assert row.note is None
+    assert row.tags is None
+    assert row["id"] == 1
+    assert row[0] == 1
+    assert row.as_dict()["score"] == 2.5
+
+
+def test_getitem_int_negative_and_bounds():
+    t = CTable(AccessRow, new_data=DATA)
+    assert t[-1].id == 3
+    with pytest.raises(IndexError):
+        _ = t[len(DATA)]
+
+
+def test_getitem_slice_returns_view():
+    t = CTable(AccessRow, new_data=DATA)
+    sub = t[1:3]
+    assert isinstance(sub, CTable)
+    assert list(sub.id) == [1, 2]
+    assert sub.base is t
+
+
+def test_getitem_integer_list_and_bool_mask_return_views():
+    t = CTable(AccessRow, new_data=DATA)
+    gathered = t[[3, 0, 2]]
+    assert isinstance(gathered, CTable)
+    assert set(gathered.id) == {0, 2, 3}
+
+    mask = np.array([True, False, True, False])
+    filtered = t[mask]
+    assert isinstance(filtered, CTable)
+    assert list(filtered.id) == [0, 2]
+
+
+def test_getitem_list_of_strings_projects_columns():
+    t = CTable(AccessRow, new_data=DATA)
+    sub = t[["id", "note"]]
+    assert isinstance(sub, CTable)
+    assert sub.col_names == ["id", "note"]
+    assert list(sub.id) == [0, 1, 2, 3]
+    assert list(sub.note) == ["zero", None, "two", "three"]
+
+
+def test_getitem_string_expression_filters_rows():
+    t = CTable(AccessRow, new_data=DATA)
+    sub = t["id >= 2"]
+    assert isinstance(sub, CTable)
+    assert list(sub.id) == [2, 3]
+
+
+def test_where_columns_projects_after_filter():
+    t = CTable(AccessRow, new_data=DATA)
+    sub = t.where("id >= 1", columns=["id", "note"])
+    assert sub.col_names == ["id", "note"]
+    assert list(sub.id) == [1, 2, 3]
+    assert list(sub.note) == [None, "two", "three"]
+
+
+def test_getitem_invalid_key_type_raises():
+    t = CTable(AccessRow, new_data=DATA)
+    with pytest.raises(TypeError):
+        _ = t[1.5]
+    with pytest.raises(TypeError):
+        _ = t[(1, 2)]
+
+
+def test_getitem_projection_unknown_column_raises():
+    t = CTable(AccessRow, new_data=DATA)
+    with pytest.raises(KeyError):
+        _ = t[["id", "missing"]]
+
+
+def test_getitem_non_boolean_expression_raises():
+    t = CTable(AccessRow, new_data=DATA)
+    with pytest.raises(TypeError):
+        _ = t["id + 1"]
+
+
+def test_ctable_array_materialization_uses_structured_dtype():
+    t = CTable(AccessRow, new_data=DATA)
+    arr = np.asarray(t)
+    assert arr.dtype.fields is not None
+    assert arr.dtype["id"] == np.dtype(np.int64)
+    assert arr.dtype["score"] == np.dtype(np.float64)
+    assert arr.dtype["active"] == np.dtype(np.bool_)
+    assert arr.dtype["note"] == np.dtype(object)
+    assert arr.dtype["tags"] == np.dtype(object)
+    assert arr[1]["id"] == 1
+    assert arr[1]["note"] is None
+    assert arr[2]["tags"] == [2]
+
+
+def test_ctable_view_array_materialization():
+    t = CTable(AccessRow, new_data=DATA)
+    arr = np.asarray(t[1:3])
+    assert arr.shape == (2,)
+    assert arr[0]["id"] == 1
+    assert arr[1]["note"] == "two"
diff --git a/tests/ctable/test_nested_access_storage.py b/tests/ctable/test_nested_access_storage.py
new file mode 100644
index 00000000..6eaa70b7
--- /dev/null
+++ b/tests/ctable/test_nested_access_storage.py
@@ -0,0 +1,165 @@
+from dataclasses import dataclass
+
+import pytest
+
+import blosc2
+
+try:
+    import pyarrow as pa
+    import pyarrow.parquet as pq
+except ImportError:  # pragma: no cover - optional dependency
+    pa = None
+    pq = None
+
+pytestmark = pytest.mark.skipif(pa is None, reason="pyarrow is required for nested Arrow/Parquet tests")
+
+
+@dataclass
+class AccessRow:
+    trip_begin_lon: float
+    payment_fare: float
+
+
+@dataclass
+class PersistRow:
+    a: int
+
+
+def test_dotted_column_attribute_namespace_and_where_string():
+    t = blosc2.CTable(AccessRow)
+    t.append((1.0, 10.0))
+    t.append((2.0, 30.0))
+    t.append((3.0, 40.0))
+
+    t.rename_column("trip_begin_lon", "trip.begin.lon")
+    t.rename_column("payment_fare", "payment.fare")
+
+    assert t["trip.begin.lon"].sum() == 6.0
+    assert t.trip.begin.lon.max() == 3.0
+
+    view1 = t.where("payment.fare > 20")
+    assert view1.nrows == 2
+
+    view2 = t.where(t.payment.fare > 20)
+    assert view2.nrows == 2
+
+
+def test_dotted_column_persists_under_hierarchical_cols(tmp_path):
+    t = blosc2.CTable(PersistRow)
+    t.append((1,))
+    t.rename_column("a", "trip.begin.lon")
+
+    path = tmp_path / "nested.b2d"
+    t.save(str(path), overwrite=True)
+
+    leaf = path / "_cols" / "trip" / "begin" / "lon.b2nd"
+    assert leaf.exists()
+
+    opened = blosc2.CTable.open(str(path))
+    assert opened["trip.begin.lon"][0] == 1
+
+
+def test_select_struct_prefix_expands_descendants():
+    t = blosc2.CTable(AccessRow)
+    t.append((1.0, 10.0))
+    t.rename_column("trip_begin_lon", "trip.begin.lon")
+    t.rename_column("payment_fare", "payment.fare")
+
+    s = t.select(["trip"])
+    assert s.col_names == ["trip.begin.lon"]
+
+
+def test_from_arrow_flattens_struct_columns_to_dotted_leaves():
+    trip_type = pa.struct([("begin", pa.struct([("lon", pa.float64()), ("lat", pa.float64())]))])
+    schema = pa.schema([pa.field("trip", trip_type)])
+    batch = pa.record_batch(
+        [
+            pa.array(
+                [
+                    {"begin": {"lon": 1.1, "lat": 2.2}},
+                    {"begin": {"lon": 3.3, "lat": 4.4}},
+                ],
+                type=trip_type,
+            )
+        ],
+        schema=schema,
+    )
+
+    t = blosc2.CTable.from_arrow(schema, [batch])
+    assert "trip.begin.lon" in t.col_names
+    assert "trip.begin.lat" in t.col_names
+    assert t["trip.begin.lon"][1] == 3.3
+
+    row0 = t[0]
+    assert isinstance(row0.trip, dict)
+    assert row0.trip["begin"]["lon"] == 1.1
+    assert row0.trip["begin"]["lat"] == 2.2
+
+
+def test_nested_field_name_escaping_for_literal_dot_and_slash(tmp_path):
+    trip_type = pa.struct([pa.field("begin/point", pa.struct([pa.field("lon.deg", pa.float64())]))])
+    schema = pa.schema([pa.field("trip.info", trip_type)])
+    batch = pa.record_batch(
+        [
+            pa.array(
+                [
+                    {"begin/point": {"lon.deg": 1.0}},
+                    {"begin/point": {"lon.deg": 2.0}},
+                ],
+                type=trip_type,
+            )
+        ],
+        schema=schema,
+    )
+
+    path = tmp_path / "escaped.b2d"
+    t = blosc2.CTable.from_arrow(schema, [batch], urlpath=str(path))
+
+    leaf_name = r"trip\.info.begin\/point.lon\.deg"
+    assert t.col_names == [leaf_name]
+    assert t[leaf_name][1] == 2.0
+    assert t[r"trip\.info"][0] == {"begin/point": {"lon.deg": 1.0}}
+    assert t.where(r"trip\.info.begin\/point.lon\.deg > 1.5").nrows == 1
+
+    leaf_path = path / "_cols" / "trip%2Einfo" / "begin%2Fpoint" / "lon%2Edeg.b2nd"
+    assert leaf_path.exists()
+
+    opened = blosc2.CTable.open(str(path))
+    assert opened.col_names == [leaf_name]
+    assert opened[leaf_name][1] == 2.0
+
+    out = t.to_arrow()
+    assert out.schema.names == ["trip.info"]
+    assert out.column("trip.info").to_pylist()[1]["begin/point"]["lon.deg"] == 2.0
+
+
+def test_nested_struct_parquet_roundtrip(tmp_path):
+    trip_type = pa.struct([("begin", pa.struct([("lon", pa.float64()), ("lat", pa.float64())]))])
+    schema = pa.schema([pa.field("trip", trip_type)])
+    table = pa.table(
+        {
+            "trip": pa.array(
+                [
+                    {"begin": {"lon": 1.1, "lat": 2.2}},
+                    {"begin": {"lon": 3.3, "lat": 4.4}},
+                    {"begin": {"lon": 5.5, "lat": 6.6}},
+                ],
+                type=trip_type,
+            )
+        },
+        schema=schema,
+    )
+
+    src = tmp_path / "src.parquet"
+    pq.write_table(table, src)
+
+    t = blosc2.CTable.from_parquet(src)
+    assert t.col_names == ["trip.begin.lon", "trip.begin.lat"]
+    assert t[2].trip["begin"]["lon"] == 5.5
+
+    dst = tmp_path / "dst.parquet"
+    t.to_parquet(dst)
+    out = pq.read_table(dst)
+    assert out.num_rows == 3
+    assert out.schema.names == ["trip"]
+    assert out.column("trip").to_pylist()[0]["begin"]["lon"] == 1.1
diff --git a/tests/ctable/test_nested_metadata_root.py b/tests/ctable/test_nested_metadata_root.py
new file mode 100644
index 00000000..a8c61364
--- /dev/null
+++ b/tests/ctable/test_nested_metadata_root.py
@@ -0,0 +1,93 @@
+import pytest
+
+import blosc2
+from blosc2.schema_compiler import schema_from_dict, schema_to_dict
+
+try:
+    import pyarrow as pa
+except ImportError:  # pragma: no cover - optional dependency
+    pa = None
+
+pytestmark = pytest.mark.skipif(pa is None, reason="pyarrow is required for nested Arrow/Parquet tests")
+
+
+def _table_with_empty_root_alias():
+    md = {b"blosc2_empty_root_physical": b"root"}
+    schema = pa.schema([pa.field("root", pa.float64())]).with_metadata(md)
+    batch = pa.record_batch([pa.array([1.0, 2.0, 3.0])], schema=schema)
+    return blosc2.CTable.from_arrow(schema, [batch])
+
+
+def test_schema_version_2_with_nested_metadata_roundtrip():
+    schema = pa.schema([pa.field("x.y", pa.float64())])
+    batch = pa.record_batch([pa.array([1.0, 2.0])], schema=schema)
+    t = blosc2.CTable.from_arrow(schema, [batch])
+
+    d = schema_to_dict(t._schema)
+    assert d["version"] == 2
+    assert "nested" in d["metadata"]
+
+    restored = schema_from_dict(d)
+    assert restored.metadata["nested"]["physical_to_storage"]["x.y"] == "_cols/x/y"
+
+
+def test_empty_root_metadata_exports_back_to_empty_arrow_name():
+    t = _table_with_empty_root_alias()
+    out = t.to_arrow()
+    assert out.schema.names == [""]
+
+
+def test_empty_root_logical_alias_getitem_select_and_index():
+    t = _table_with_empty_root_alias()
+    assert t[""][0] == 1.0
+    s = t.select([""])
+    assert s.col_names == ["root"]
+
+    ix = t.create_index(col_name="")
+    assert ix is not None
+
+    # index management should accept logical alias too
+    t.rebuild_index(col_name="")
+    t.drop_index(col_name="")
+
+
+def test_sort_by_nested_prefix_requires_leaf_column():
+    schema = pa.schema([pa.field("trip.begin.lon", pa.float64()), pa.field("trip.begin.lat", pa.float64())])
+    batch = pa.record_batch([pa.array([2.0, 1.0]), pa.array([20.0, 10.0])], schema=schema)
+    t = blosc2.CTable.from_arrow(schema, [batch])
+
+    with pytest.raises(ValueError):
+        t.sort_by("trip")
+
+    s = t.sort_by("trip.begin.lon")
+    assert s["trip.begin.lon"][0] == 1.0
+
+
+@pytest.mark.heavy
+def test_nested_ops_compat_matrix_smoke():
+    n = 20_000
+    lon = pa.array([float(i % 1000) for i in range(n)], type=pa.float64())
+    lat = pa.array([float((i * 2) % 1000) for i in range(n)], type=pa.float64())
+    fare = pa.array([float(i % 50) for i in range(n)], type=pa.float64())
+    schema = pa.schema(
+        [
+            pa.field("trip.begin.lon", pa.float64()),
+            pa.field("trip.begin.lat", pa.float64()),
+            pa.field("payment.fare", pa.float64()),
+        ]
+    )
+    batch = pa.record_batch([lon, lat, fare], schema=schema)
+
+    t = blosc2.CTable.from_arrow(schema, [batch])
+
+    view = t.where("payment.fare > 25")
+    assert 0 < view.nrows < n
+
+    t.create_index(col_name="payment.fare")
+    t.rebuild_index(col_name="payment.fare")
+
+    sorted_t = t.sort_by("trip.begin.lon")
+    assert sorted_t["trip.begin.lon"][0] <= sorted_t["trip.begin.lon"][1]
+
+    proj = t.select(["trip"])
+    assert proj.col_names == ["trip.begin.lon", "trip.begin.lat"]
diff --git a/tests/ctable/test_parquet_interop.py b/tests/ctable/test_parquet_interop.py
index 64f73f56..193eddf6 100644
--- a/tests/ctable/test_parquet_interop.py
+++ b/tests/ctable/test_parquet_interop.py
@@ -8,6 +8,7 @@
 """Tests for CTable.to_parquet(), from_parquet(), iter_arrow_batches(),
 and from_arrow()."""
 
+import io
 from dataclasses import dataclass
 
 import numpy as np
@@ -662,6 +663,29 @@ def test_invalid_batch_size_from_parquet(self, tmp_path):
         with pytest.raises(ValueError, match="batch_size"):
             CTable.from_parquet(path, batch_size=0)
 
+    def test_invalid_max_rows_from_parquet(self, tmp_path):
+        t = CTable(Row, new_data=DATA10)
+        path = tmp_path / "x.parquet"
+        t.to_parquet(path)
+        with pytest.raises(ValueError, match="max_rows"):
+            CTable.from_parquet(path, max_rows=-1)
+
+    def test_max_rows_from_parquet_limits_rows(self, tmp_path):
+        t = CTable(Row, new_data=DATA10)
+        path = tmp_path / "x.parquet"
+        t.to_parquet(path)
+        out = CTable.from_parquet(path, batch_size=4, max_rows=6)
+        assert len(out) == 6
+        np.testing.assert_array_equal(out["id"][:], np.arange(6))
+
+    def test_max_rows_zero_from_parquet_imports_empty_table(self, tmp_path):
+        t = CTable(Row, new_data=DATA10)
+        path = tmp_path / "x.parquet"
+        t.to_parquet(path)
+        out = CTable.from_parquet(path, max_rows=0)
+        assert len(out) == 0
+        assert out.col_names == ["id", "score", "active", "label"]
+
     def test_string_truncation_error(self, tmp_path):
         """Importing longer strings than max_length raises ValueError."""
         at = pa.table({"name": pa.array(["a" * 300, "b"], type=pa.string())})
@@ -672,6 +696,104 @@ def test_string_truncation_error(self, tmp_path):
             CTable.from_parquet(path, string_max_length=10)
 
 
+def test_parquet_cli_progress_is_opt_in(tmp_path, capsys):
+    from blosc2.cli.parquet_to_blosc2 import main
+
+    path = tmp_path / "progress.parquet"
+    out = tmp_path / "progress.b2d"
+    pq.write_table(pa.table({"x": pa.array([1, 2, 3], type=pa.int64())}), path)
+
+    assert main(["--parquet-batch-size", "1", str(path), str(out)]) == 0
+    captured = capsys.readouterr()
+    assert "  batch" not in captured.out
+
+    out_progress = tmp_path / "progress_enabled.b2d"
+    assert main(["--progress", "--parquet-batch-size", "1", str(path), str(out_progress)]) == 0
+    captured = capsys.readouterr()
+    assert "  batch" in captured.out
+
+
+def test_parquet_cli_nested_progress_skips_write_lines(tmp_path, capsys):
+    from blosc2.cli.parquet_to_blosc2 import main
+
+    buf, _ = _make_taxi_parquet_buf(n_outer_rows=3)
+    path = tmp_path / "taxi.parquet"
+    out = tmp_path / "taxi.b2d"
+    path.write_bytes(buf.getvalue())
+
+    assert (
+        main(
+            [
+                "--progress",
+                "--parquet-batch-size",
+                "1",
+                "--blosc2-batch-size",
+                "1",
+                str(path),
+                str(out),
+            ]
+        )
+        == 0
+    )
+    captured = capsys.readouterr()
+    assert "  parquet batch" in captured.out
+    assert "    write" not in captured.out
+
+
+def test_parquet_cli_separate_nested_flattens_top_level_structs(tmp_path, capsys):
+    from blosc2.cli.parquet_to_blosc2 import main
+
+    trip_type = pa.struct(
+        [
+            pa.field("sec", pa.float32()),
+            pa.field("begin", pa.struct([pa.field("lon", pa.float64()), pa.field("lat", pa.float64())])),
+        ]
+    )
+    path = tmp_path / "struct.parquet"
+    out = tmp_path / "struct.b2d"
+    table = pa.table(
+        {
+            "trip": pa.array(
+                [
+                    {"sec": 10.0, "begin": {"lon": -87.6, "lat": 41.8}},
+                    {"sec": 20.0, "begin": {"lon": -87.7, "lat": 41.9}},
+                ],
+                type=trip_type,
+            ),
+            "fare": pa.array([15.0, 25.0], type=pa.float32()),
+        }
+    )
+    pq.write_table(table, path)
+
+    assert main([str(path), str(out)]) == 0
+    captured = capsys.readouterr()
+    assert "Struct→columns:      1" in captured.out
+
+    ct = CTable.open(str(out), mode="r")
+    assert ct.col_names == ["trip.sec", "trip.begin.lon", "trip.begin.lat", "fare"]
+    np.testing.assert_allclose(ct["trip.begin.lon"][:], [-87.6, -87.7])
+    ct.close()
+
+
+def test_parquet_cli_no_separate_nested_preserves_top_level_struct_as_list(tmp_path):
+    from blosc2.cli.parquet_to_blosc2 import main
+
+    trip_type = pa.struct([pa.field("sec", pa.float32())])
+    path = tmp_path / "struct.parquet"
+    out = tmp_path / "struct.b2d"
+    pq.write_table(
+        pa.table({"trip": pa.array([{"sec": 10.0}, {"sec": 20.0}], type=trip_type)}),
+        path,
+    )
+
+    assert main(["--no-separate-nested-cols", str(path), str(out)]) == 0
+
+    ct = CTable.open(str(out), mode="r")
+    assert ct.col_names == ["trip"]
+    assert ct["trip"][:] == [[{"sec": 10.0}], [{"sec": 20.0}]]
+    ct.close()
+
+
 def test_parquet_cli_timestamp_unit_auto(tmp_path):
     from blosc2.cli.parquet_to_blosc2 import main
 
@@ -696,5 +818,495 @@ def test_parquet_cli_timestamp_unit_auto(tmp_path):
     assert table._cols["ts"][:].tolist() == [1735689600, 1735689601, 1735689602]
 
 
+# ---------------------------------------------------------------------------
+# separate_nested_cols / unnamed-root list<struct<...>> import
+# ---------------------------------------------------------------------------
+
+# ---------------------------------------------------------------------------
+# Shared schema / data helpers
+# ---------------------------------------------------------------------------
+
+
+def _make_taxi_schema():
+    """Return a simplified taxi-like Arrow schema (inner struct fields)."""
+    trip_type = pa.struct(
+        [
+            pa.field("sec", pa.float32()),
+            pa.field(
+                "begin",
+                pa.struct([pa.field("lon", pa.float64()), pa.field("lat", pa.float64())]),
+            ),
+        ]
+    )
+    payment_type = pa.struct(
+        [
+            pa.field("fare", pa.float64()),
+            pa.field("tips", pa.float64()),
+        ]
+    )
+    return pa.struct(
+        [
+            pa.field("trip", trip_type),
+            pa.field("payment", payment_type),
+            pa.field("company", pa.string()),
+        ]
+    )
+
+
+def _make_taxi_parquet_buf(n_outer_rows=2):
+    """Create an in-memory Parquet buffer with an unnamed root list<struct<...>>.
+
+    *n_outer_rows* controls how many Parquet rows (outer lists) to create.
+    Each outer list contains 1–3 trip records.
+    """
+    root_struct = _make_taxi_schema()
+    root_list = pa.list_(root_struct)
+
+    all_rows = [
+        [
+            {
+                "trip": {"sec": 10.0, "begin": {"lon": -87.6, "lat": 41.8}},
+                "payment": {"fare": 15.0, "tips": 2.0},
+                "company": "Taxi Corp",
+            },
+            {
+                "trip": {"sec": 20.0, "begin": {"lon": -87.7, "lat": 41.9}},
+                "payment": {"fare": 25.0, "tips": 3.0},
+                "company": "Blue Cab",
+            },
+        ],
+        [
+            {
+                "trip": {"sec": 5.0, "begin": {"lon": -87.5, "lat": 41.7}},
+                "payment": {"fare": 10.0, "tips": 1.0},
+                "company": "Taxi Corp",
+            },
+        ],
+        [
+            {
+                "trip": {"sec": 30.0, "begin": {"lon": -87.3, "lat": 41.6}},
+                "payment": {"fare": 5.0, "tips": 0.5},
+                "company": "City Cab",
+            },
+            {
+                "trip": {"sec": 15.0, "begin": {"lon": -87.4, "lat": 41.5}},
+                "payment": {"fare": 12.0, "tips": 1.5},
+                "company": "Blue Cab",
+            },
+            {
+                "trip": {"sec": 8.0, "begin": {"lon": -87.2, "lat": 41.4}},
+                "payment": {"fare": 9.0, "tips": 0.0},
+                "company": "Taxi Corp",
+            },
+        ],
+    ]
+    rows = all_rows[:n_outer_rows]
+    arr = pa.array(rows, type=root_list)
+    buf = io.BytesIO()
+    pq.write_table(pa.table({"": arr}), buf)
+    buf.seek(0)
+    return buf, rows
+
+
+def _count_elements(rows):
+    """Count the total number of list elements across outer rows."""
+    return sum(len(r) for r in rows)
+
+
+# ---------------------------------------------------------------------------
+# Detection helper tests
+# ---------------------------------------------------------------------------
+
+
+class TestDetectUnnamedRootListStruct:
+    def test_detects_single_unnamed_list_struct(self):
+        root_struct = _make_taxi_schema()
+        schema = pa.schema([pa.field("", pa.list_(root_struct))])
+        assert CTable._detect_unnamed_root_list_struct(pa, schema) is True
+
+    def test_detects_large_list_variant(self):
+        root_struct = _make_taxi_schema()
+        schema = pa.schema([pa.field("", pa.large_list(root_struct))])
+        assert CTable._detect_unnamed_root_list_struct(pa, schema) is True
+
+    def test_rejects_named_field(self):
+        root_struct = _make_taxi_schema()
+        schema = pa.schema([pa.field("events", pa.list_(root_struct))])
+        assert CTable._detect_unnamed_root_list_struct(pa, schema) is False
+
+    def test_rejects_multiple_fields(self):
+        root_struct = _make_taxi_schema()
+        schema = pa.schema([pa.field("", pa.list_(root_struct)), pa.field("id", pa.int64())])
+        assert CTable._detect_unnamed_root_list_struct(pa, schema) is False
+
+    def test_rejects_non_list_unnamed_field(self):
+        root_struct = _make_taxi_schema()
+        schema = pa.schema([pa.field("", root_struct)])
+        assert CTable._detect_unnamed_root_list_struct(pa, schema) is False
+
+    def test_rejects_list_of_scalar(self):
+        schema = pa.schema([pa.field("", pa.list_(pa.int64()))])
+        assert CTable._detect_unnamed_root_list_struct(pa, schema) is False
+
+
+# ---------------------------------------------------------------------------
+# Phase 1 acceptance tests
+# ---------------------------------------------------------------------------
+
+
+class TestUnnamedRootImport:
+    """Acceptance tests for Phase 1: unnamed-root list<struct<...>> import."""
+
+    def _make_ct(self, n_outer_rows=2, **kwargs):
+        buf, rows = _make_taxi_parquet_buf(n_outer_rows)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True, **kwargs)
+        return ct, rows
+
+    # ------------------------------------------------------------------
+    # Row count
+    # ------------------------------------------------------------------
+
+    def test_nrows_equals_element_count_2_outer(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        assert len(ct) == _count_elements(rows)  # 3
+
+    def test_from_parquet_separates_nested_cols_by_default(self):
+        buf, rows = _make_taxi_parquet_buf(n_outer_rows=2)
+        ct = CTable.from_parquet(buf)
+        assert len(ct) == _count_elements(rows)
+        assert "column_0" not in ct.col_names
+        assert "trip.begin.lon" in ct.col_names
+
+    def test_nrows_equals_element_count_3_outer(self):
+        ct, rows = self._make_ct(n_outer_rows=3)
+        assert len(ct) == _count_elements(rows)  # 6
+
+    def test_max_rows_limits_flattened_element_rows(self):
+        ct, rows = self._make_ct(n_outer_rows=3, max_rows=4, batch_size=1)
+        expected = [r["payment"]["fare"] for outer in rows for r in outer][:4]
+        assert len(ct) == 4
+        np.testing.assert_allclose(ct["payment.fare"][:].tolist(), expected)
+        assert ct._schema.metadata["nested"]["original_root"]["kind"] == "unnamed_list_struct"
+
+    def test_max_rows_zero_imports_empty_flattened_table(self):
+        ct, _ = self._make_ct(n_outer_rows=3, max_rows=0)
+        assert len(ct) == 0
+        assert "column_0" not in ct.col_names
+        assert "trip.begin.lon" in ct.col_names
+        assert ct._schema.metadata["nested"]["original_root"]["kind"] == "unnamed_list_struct"
+
+    # ------------------------------------------------------------------
+    # Column names — no column_0, no unnamed root in col_names
+    # ------------------------------------------------------------------
+
+    def test_col_names_no_column_0(self):
+        ct, _ = self._make_ct()
+        assert "column_0" not in ct.col_names
+        assert "" not in ct.col_names
+
+    def test_col_names_contains_leaf_paths(self):
+        ct, _ = self._make_ct()
+        expected = {
+            "trip.sec",
+            "trip.begin.lon",
+            "trip.begin.lat",
+            "payment.fare",
+            "payment.tips",
+            "company",
+        }
+        assert set(ct.col_names) == expected
+
+    # ------------------------------------------------------------------
+    # Column access and analytics
+    # ------------------------------------------------------------------
+
+    def test_payment_fare_mean(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        fares = [r["payment"]["fare"] for outer in rows for r in outer]
+        expected = np.mean(fares)
+        np.testing.assert_allclose(ct["payment.fare"].mean(), expected)
+
+    def test_trip_begin_lon_mean(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        lons = [r["trip"]["begin"]["lon"] for outer in rows for r in outer]
+        expected = np.mean(lons)
+        np.testing.assert_allclose(ct["trip.begin.lon"].mean(), expected)
+
+    def test_payment_fare_values(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        expected = [r["payment"]["fare"] for outer in rows for r in outer]
+        np.testing.assert_allclose(ct["payment.fare"][:].tolist(), expected)
+
+    def test_company_column_values(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        expected = [r["company"] for outer in rows for r in outer]
+        assert list(ct["company"][:]) == expected
+
+    # ------------------------------------------------------------------
+    # where() filtering
+    # ------------------------------------------------------------------
+
+    def test_where_payment_fare_gt_12(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        all_fares = [r["payment"]["fare"] for outer in rows for r in outer]
+        expected_count = sum(1 for f in all_fares if f > 12)
+        result = ct.where("payment.fare > 12")
+        assert len(result) == expected_count
+
+    def test_where_payment_fare_gt_20(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        all_fares = [r["payment"]["fare"] for outer in rows for r in outer]
+        expected_count = sum(1 for f in all_fares if f > 20)
+        result = ct.where("payment.fare > 20")
+        assert len(result) == expected_count
+
+    # ------------------------------------------------------------------
+    # Provenance metadata
+    # ------------------------------------------------------------------
+
+    def test_original_root_metadata_present(self):
+        ct, _ = self._make_ct()
+        nested = ct._schema.metadata.get("nested", {})
+        assert "original_root" in nested
+
+    def test_original_root_metadata_kind(self):
+        ct, _ = self._make_ct()
+        orig = ct._schema.metadata["nested"]["original_root"]
+        assert orig["kind"] == "unnamed_list_struct"
+
+    def test_original_root_metadata_field_name(self):
+        ct, _ = self._make_ct()
+        orig = ct._schema.metadata["nested"]["original_root"]
+        assert orig["field_name"] == ""
+
+    def test_original_root_metadata_preserve_grouping_false(self):
+        ct, _ = self._make_ct()
+        orig = ct._schema.metadata["nested"]["original_root"]
+        assert orig["preserve_grouping"] is False
+
+    # ------------------------------------------------------------------
+    # Persistence: .b2d reopen
+    # ------------------------------------------------------------------
+
+    def test_b2d_reopen_nrows(self, tmp_path):
+        buf, rows = _make_taxi_parquet_buf(n_outer_rows=2)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True, urlpath=str(tmp_path / "taxi.b2d"))
+        ct.close()
+        ct2 = CTable.open(str(tmp_path / "taxi.b2d"), mode="r")
+        assert len(ct2) == _count_elements(rows)
+        ct2.close()
+
+    def test_b2d_reopen_col_names(self, tmp_path):
+        buf, _ = _make_taxi_parquet_buf(n_outer_rows=2)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True, urlpath=str(tmp_path / "taxi.b2d"))
+        col_names = ct.col_names
+        ct.close()
+        ct2 = CTable.open(str(tmp_path / "taxi.b2d"), mode="r")
+        assert ct2.col_names == col_names
+        ct2.close()
+
+    def test_b2d_reopen_values(self, tmp_path):
+        buf, rows = _make_taxi_parquet_buf(n_outer_rows=2)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True, urlpath=str(tmp_path / "taxi.b2d"))
+        expected_fares = [r["payment"]["fare"] for outer in rows for r in outer]
+        ct.close()
+        ct2 = CTable.open(str(tmp_path / "taxi.b2d"), mode="r")
+        np.testing.assert_allclose(ct2["payment.fare"][:].tolist(), expected_fares)
+        ct2.close()
+
+    def test_b2d_reopen_original_root_metadata(self, tmp_path):
+        buf, _ = _make_taxi_parquet_buf()
+        ct = CTable.from_parquet(buf, separate_nested_cols=True, urlpath=str(tmp_path / "taxi.b2d"))
+        ct.close()
+        ct2 = CTable.open(str(tmp_path / "taxi.b2d"), mode="r")
+        orig = ct2._schema.metadata["nested"]["original_root"]
+        assert orig["kind"] == "unnamed_list_struct"
+        ct2.close()
+
+    def test_b2z_reopen(self, tmp_path):
+        buf, rows = _make_taxi_parquet_buf(n_outer_rows=2)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True, urlpath=str(tmp_path / "taxi.b2z"))
+        ct.close()
+        ct2 = CTable.open(str(tmp_path / "taxi.b2z"), mode="r")
+        assert len(ct2) == _count_elements(rows)
+        assert "trip.begin.lon" in ct2.col_names
+        ct2.close()
+
+    # ------------------------------------------------------------------
+    # to_arrow() emits clean logical nested table
+    # ------------------------------------------------------------------
+
+    def test_to_arrow_no_unnamed_column(self):
+        ct, _ = self._make_ct()
+        arrow_table = ct.to_arrow()
+        assert "" not in arrow_table.schema.names
+        assert "column_0" not in arrow_table.schema.names
+
+    def test_to_arrow_has_trip_and_payment_top_level(self):
+        ct, _ = self._make_ct()
+        arrow_table = ct.to_arrow()
+        names = arrow_table.schema.names
+        assert "trip" in names
+        assert "payment" in names
+        assert "company" in names
+
+    def test_to_arrow_trip_is_struct(self):
+        ct, _ = self._make_ct()
+        arrow_table = ct.to_arrow()
+        assert pa.types.is_struct(arrow_table.schema.field("trip").type)
+
+    def test_to_arrow_payment_fare_values(self):
+        ct, rows = self._make_ct(n_outer_rows=2)
+        arrow_table = ct.to_arrow()
+        expected = [r["payment"]["fare"] for outer in rows for r in outer]
+        payment_col = arrow_table.column("payment")
+        actual = [row.as_py()["fare"] for row in payment_col]
+        np.testing.assert_allclose(actual, expected)
+
+    # ------------------------------------------------------------------
+    # from_arrow with separate_nested_cols=True
+    # ------------------------------------------------------------------
+
+    def test_from_arrow_separate_nested_cols(self):
+        """from_arrow accepts separate_nested_cols=True directly."""
+        root_struct = _make_taxi_schema()
+        root_list = pa.list_(root_struct)
+        data = [
+            [
+                {
+                    "trip": {"sec": 10.0, "begin": {"lon": -87.6, "lat": 41.8}},
+                    "payment": {"fare": 15.0, "tips": 2.0},
+                    "company": "Taxi",
+                },
+                {
+                    "trip": {"sec": 5.0, "begin": {"lon": -87.5, "lat": 41.7}},
+                    "payment": {"fare": 10.0, "tips": 1.0},
+                    "company": "Taxi",
+                },
+            ]
+        ]
+        arr = pa.array(data, type=root_list)
+        schema = pa.schema([pa.field("", root_list)])
+        batch = pa.record_batch([arr], schema=schema)
+        ct = CTable.from_arrow(schema, [batch], separate_nested_cols=True)
+        assert len(ct) == 2
+        assert "trip.begin.lon" in ct.col_names
+        assert "payment.fare" in ct.col_names
+        np.testing.assert_allclose(ct["payment.fare"][:].tolist(), [15.0, 10.0])
+
+    # ------------------------------------------------------------------
+    # Behaviour when separate_nested_cols=False (existing behaviour)
+    # ------------------------------------------------------------------
+
+    def test_false_flag_gives_renamed_root_column(self):
+        """Without separate_nested_cols, the old renaming behaviour applies."""
+        buf, _ = _make_taxi_parquet_buf(n_outer_rows=2)
+        ct = CTable.from_parquet(buf, separate_nested_cols=False)
+        # The unnamed "" field should be renamed to "root"
+        assert "root" in ct.col_names
+
+    def test_false_flag_nrows_equals_parquet_rows(self):
+        """Without separate_nested_cols, nrows is the number of Parquet outer rows."""
+        buf, rows = _make_taxi_parquet_buf(n_outer_rows=2)
+        ct = CTable.from_parquet(buf, separate_nested_cols=False)
+        # 2 Parquet rows, not 3 elements
+        assert len(ct) == len(rows)
+
+    # ------------------------------------------------------------------
+    # Edge cases
+    # ------------------------------------------------------------------
+
+    def test_empty_outer_list(self):
+        """Importing a Parquet file where all outer lists are empty gives 0 rows."""
+        root_struct = _make_taxi_schema()
+        root_list = pa.list_(root_struct)
+        arr = pa.array([[], []], type=root_list)
+        buf = io.BytesIO()
+        pq.write_table(pa.table({"": arr}), buf)
+        buf.seek(0)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True)
+        assert len(ct) == 0
+        assert set(ct.col_names) == {
+            "trip.sec",
+            "trip.begin.lon",
+            "trip.begin.lat",
+            "payment.fare",
+            "payment.tips",
+            "company",
+        }
+
+    def test_single_element(self):
+        """A single-element list imports as one CTable row."""
+        root_struct = _make_taxi_schema()
+        root_list = pa.list_(root_struct)
+        arr = pa.array(
+            [
+                [
+                    {
+                        "trip": {"sec": 7.0, "begin": {"lon": -87.0, "lat": 41.0}},
+                        "payment": {"fare": 8.0, "tips": 0.5},
+                        "company": "X",
+                    }
+                ]
+            ],
+            type=root_list,
+        )
+        buf = io.BytesIO()
+        pq.write_table(pa.table({"": arr}), buf)
+        buf.seek(0)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True)
+        assert len(ct) == 1
+        assert ct["payment.fare"][0] == 8.0
+
+    def test_non_qualifying_schema_ignored_with_flag(self):
+        """separate_nested_cols=True is silently ignored for a normal (non-qualifying) schema."""
+        at = pa.table({"x": pa.array([1, 2, 3], type=pa.int64()), "y": pa.array([4.0, 5.0, 6.0])})
+        buf = io.BytesIO()
+        pq.write_table(at, buf)
+        buf.seek(0)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True)
+        assert len(ct) == 3
+        assert ct.col_names == ["x", "y"]
+
+    def test_multiple_batches(self):
+        """separate_nested_cols works when Parquet is read in small batches."""
+        buf, rows = _make_taxi_parquet_buf(n_outer_rows=3)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True, batch_size=1)
+        assert len(ct) == _count_elements(rows)
+        fares = [r["payment"]["fare"] for outer in rows for r in outer]
+        np.testing.assert_allclose(ct["payment.fare"][:].tolist(), fares)
+
+    def test_nested_list_inside_element_ignored_at_phase1(self):
+        """A nested list inside the element struct is imported as a ListArray column (phase 1)."""
+        path_type = pa.struct([pa.field("londiff", pa.float32()), pa.field("latdiff", pa.float32())])
+        trip_with_path = pa.struct(
+            [
+                pa.field("sec", pa.float32()),
+                pa.field("path", pa.list_(path_type)),
+            ]
+        )
+        root_struct = pa.struct([pa.field("trip", trip_with_path), pa.field("fare", pa.float64())])
+        root_list = pa.list_(root_struct)
+        data = [
+            [
+                {"trip": {"sec": 10.0, "path": [{"londiff": 0.1, "latdiff": 0.2}]}, "fare": 15.0},
+                {"trip": {"sec": 5.0, "path": []}, "fare": 8.0},
+            ]
+        ]
+        arr = pa.array(data, type=root_list)
+        buf = io.BytesIO()
+        pq.write_table(pa.table({"": arr}), buf)
+        buf.seek(0)
+        ct = CTable.from_parquet(buf, separate_nested_cols=True)
+        assert len(ct) == 2
+        assert "fare" in ct.col_names
+        assert ct["fare"][:].tolist() == [15.0, 8.0]
+        # trip.path should be a ListArray column with one list per element row
+        assert "trip.path" in ct.col_names
+        assert ct["trip.path"].is_list
+        assert ct["trip.path"][0] == [{"londiff": pytest.approx(0.1), "latdiff": pytest.approx(0.2)}]
+        assert ct["trip.path"][1] == []
+
+
 if __name__ == "__main__":
     pytest.main(["-v", __file__])
diff --git a/tests/ctable/test_table_persistency.py b/tests/ctable/test_table_persistency.py
index 3873318c..2f10d493 100644
--- a/tests/ctable/test_table_persistency.py
+++ b/tests/ctable/test_table_persistency.py
@@ -459,15 +459,10 @@ class Bad:
         CTable(Bad)
 
 
-def test_column_name_cannot_contain_slash():
-    @dataclass
-    class Bad:
-        pass
-
+def test_column_name_can_contain_slash():
     from blosc2.schema_compiler import _validate_column_name
 
-    with pytest.raises(ValueError, match="/"):
-        _validate_column_name("a/b")
+    _validate_column_name("a/b")
 
 
 def test_column_name_cannot_be_empty():
diff --git a/tests/ctable/test_varlen_columns.py b/tests/ctable/test_varlen_columns.py
index 11d643a2..4a527d6e 100644
--- a/tests/ctable/test_varlen_columns.py
+++ b/tests/ctable/test_varlen_columns.py
@@ -38,6 +38,16 @@ def test_ctable_varlen_append_extend_and_reads():
     assert t.tags[2] == ["r", "s"]
 
 
+def test_list_column_display():
+    t = blosc2.CTable(Product, new_data=DATA)
+    text = str(t)
+    col_text = repr(t.tags)
+
+    assert "['x', 'y']" in text
+    assert "<list[string]>" in col_text
+    assert "['x', 'y']" not in col_text
+
+
 def test_ctable_varlen_where_select_head_tail_and_compact():
     t = blosc2.CTable(Product, new_data=DATA)
     view = t.where(t.qty >= 2)
diff --git a/tests/ndarray/test_elementwise_funcs.py b/tests/ndarray/test_elementwise_funcs.py
index 85d6edca..c82705f2 100644
--- a/tests/ndarray/test_elementwise_funcs.py
+++ b/tests/ndarray/test_elementwise_funcs.py
@@ -314,6 +314,7 @@ def test_unary_funcs(np_func, blosc_func, dtype, shape, chunkshape):
     _test_unary_func_impl(np_func, blosc_func, dtype, shape, chunkshape)
 
 
+@pytest.mark.heavy
 @pytest.mark.parametrize(("np_func", "blosc_func"), UNARY_FUNC_PAIRS)
 @pytest.mark.parametrize("dtype", STR_DTYPES)
 @pytest.mark.parametrize("shape", [(10,), (20, 20)])
@@ -338,6 +339,7 @@ def test_binary_funcs(np_func, blosc_func, dtype, shape, chunkshape):
     _test_binary_func_impl(np_func, blosc_func, dtype, shape, chunkshape)
 
 
+@pytest.mark.heavy
 @pytest.mark.parametrize(("np_func", "blosc_func"), BINARY_FUNC_PAIRS)
 @pytest.mark.parametrize("dtype", STR_DTYPES)
 @pytest.mark.parametrize(("shape", "chunkshape"), SHAPES_CHUNKS)
diff --git a/tests/ndarray/test_lazyexpr.py b/tests/ndarray/test_lazyexpr.py
index 6525a07d..7b33cdff 100644
--- a/tests/ndarray/test_lazyexpr.py
+++ b/tests/ndarray/test_lazyexpr.py
@@ -27,6 +27,37 @@
 NITEMS_SMALL = 100
 NITEMS = 1000
 
+_UNARY_FUNCTIONS = [
+    "sin",
+    "cos",
+    "sqrt",
+    "tan",
+    "arctan",
+    "exp",
+    "log",
+    "conj",
+    "real",
+    "imag",
+    pytest.param("sinh", marks=pytest.mark.heavy),
+    pytest.param("cosh", marks=pytest.mark.heavy),
+    pytest.param("tanh", marks=pytest.mark.heavy),
+    pytest.param("arcsin", marks=pytest.mark.heavy),
+    pytest.param("arccos", marks=pytest.mark.heavy),
+    pytest.param("arcsinh", marks=pytest.mark.heavy),
+    pytest.param("arccosh", marks=pytest.mark.heavy),
+    pytest.param("arctanh", marks=pytest.mark.heavy),
+    pytest.param("expm1", marks=pytest.mark.heavy),
+    pytest.param("log10", marks=pytest.mark.heavy),
+    pytest.param("log1p", marks=pytest.mark.heavy),
+]
+
+_LAZYEXPR_OPERAND_MIXES = [
+    ("NDArray", "numpy"),
+    ("NDArray", "NDArray"),
+    pytest.param(("numpy", "NDArray"), marks=pytest.mark.heavy),
+    pytest.param(("numpy", "numpy"), marks=pytest.mark.heavy),
+]
+
 
 @pytest.fixture(params=[np.float32, np.float64])
 def dtype_fixture(request):
@@ -357,32 +388,7 @@ def test_comparison_operators(dtype_fixture, compare_expressions, comparison_ope
 
 # Skip this test for blosc2.IS_WASM
 @pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM")
-@pytest.mark.parametrize(
-    "function",
-    [
-        "sin",
-        "cos",
-        "tan",
-        "sqrt",
-        "sinh",
-        "cosh",
-        "tanh",
-        "arcsin",
-        "arccos",
-        "arctan",
-        "arcsinh",
-        "arccosh",
-        "arctanh",
-        "exp",
-        "expm1",
-        "log",
-        "log10",
-        "log1p",
-        "conj",
-        "real",
-        "imag",
-    ],
-)
+@pytest.mark.parametrize("function", _UNARY_FUNCTIONS)
 def test_functions(function, dtype_fixture, shape_fixture):
     nelems = np.prod(shape_fixture)
     cparams = {"clevel": 0, "codec": blosc2.Codec.LZ4}  # Compression parameters
@@ -436,10 +442,7 @@ def test_functions(function, dtype_fixture, shape_fixture):
     np.testing.assert_allclose(expr[()], res_numexpr, rtol=1e-5)
 
 
-@pytest.mark.parametrize(
-    "urlpath",
-    ["arr.b2nd", None],
-)
+@pytest.mark.parametrize("urlpath", [None, pytest.param("arr.b2nd", marks=pytest.mark.heavy)])
 @pytest.mark.parametrize(
     "function",
     ["arctan2", "**"],
@@ -784,15 +787,15 @@ def test_save_unsafe():
     [
         "sin",
         "sqrt",
-        "cosh",
         "arctan",
-        "arcsinh",
         "exp",
-        "expm1",
         "log",
         "conj",
         "real",
         "imag",
+        pytest.param("cosh", marks=pytest.mark.heavy),
+        pytest.param("arcsinh", marks=pytest.mark.heavy),
+        pytest.param("expm1", marks=pytest.mark.heavy),
     ],
 )
 def test_save_functions(function, dtype_fixture, shape_fixture):
@@ -911,10 +914,19 @@ def test_save_many_functions(dtype_fixture, shape_fixture):
 
 @pytest.mark.skipif(blosc2.IS_WASM, reason="This test is not supported in WASM")
 @pytest.mark.parametrize(
-    "constructor", ["arange", "linspace", "fromiter", "reshape", "zeros", "ones", "full"]
+    "constructor",
+    [
+        "arange",
+        "linspace",
+        "reshape",
+        "zeros",
+        "ones",
+        pytest.param("fromiter", marks=pytest.mark.heavy),
+        pytest.param("full", marks=pytest.mark.heavy),
+    ],
 )
-@pytest.mark.parametrize("shape", [(10,), (10, 10), (10, 10, 10)])
-@pytest.mark.parametrize("dtype", ["int32", "float64", "i2"])
+@pytest.mark.parametrize("shape", [(10,), (10, 10), pytest.param((10, 10, 10), marks=pytest.mark.heavy)])
+@pytest.mark.parametrize("dtype", ["int32", "float64", pytest.param("i2", marks=pytest.mark.heavy)])
 @pytest.mark.parametrize("disk", [True, False])
 def test_save_constructor(disk, shape, dtype, constructor):
     lshape = math.prod(shape)
@@ -1129,15 +1141,7 @@ def test_broadcasting_str(broadcast_fixture):
     np.testing.assert_allclose(res, nres)
 
 
-@pytest.mark.parametrize(
-    "operand_mix",
-    [
-        ("NDArray", "numpy"),
-        ("NDArray", "NDArray"),
-        ("numpy", "NDArray"),
-        ("numpy", "numpy"),
-    ],
-)
+@pytest.mark.parametrize("operand_mix", _LAZYEXPR_OPERAND_MIXES)
 @pytest.mark.parametrize("operand_guess", [True, False])
 def test_lazyexpr(array_fixture, operand_mix, operand_guess):
     a1, a2, a3, a4, na1, na2, na3, na4 = array_fixture
@@ -1179,15 +1183,7 @@ def test_lazyexpr(array_fixture, operand_mix, operand_guess):
     np.testing.assert_allclose(res, nres[0:10:2])
 
 
-@pytest.mark.parametrize(
-    "operand_mix",
-    [
-        ("NDArray", "numpy"),
-        ("NDArray", "NDArray"),
-        ("numpy", "NDArray"),
-        ("numpy", "numpy"),
-    ],
-)
+@pytest.mark.parametrize("operand_mix", _LAZYEXPR_OPERAND_MIXES)
 @pytest.mark.parametrize(
     "out_param",
     ["NDArray", "numpy"],
@@ -1405,38 +1401,35 @@ def test_get_expr_operands(expression, expected_operands):
     "scalar",
     [
         "np.int8(0)",
-        "np.uint8(0)",
-        "np.int16(0)",
-        "np.uint16(0)",
-        "np.int32(0)",
-        "np.uint32(0)",
-        "np.int64(0)",
         "np.float32(0)",
         "np.float64(0)",
         "np.complex64(0)",
-        "np.complex128(0)",
+        pytest.param("np.uint8(0)", marks=pytest.mark.heavy),
+        pytest.param("np.int16(0)", marks=pytest.mark.heavy),
+        pytest.param("np.uint16(0)", marks=pytest.mark.heavy),
+        pytest.param("np.int32(0)", marks=pytest.mark.heavy),
+        pytest.param("np.uint32(0)", marks=pytest.mark.heavy),
+        pytest.param("np.int64(0)", marks=pytest.mark.heavy),
+        pytest.param("np.complex128(0)", marks=pytest.mark.heavy),
     ],
 )
 @pytest.mark.parametrize(
     ("dtype1", "dtype2"),
     [
         (np.int8, np.int8),
-        (np.int8, np.int16),
-        (np.int8, np.int32),
-        (np.int8, np.int64),
         (np.int8, np.float32),
-        (np.int8, np.float64),
-        (np.uint16, np.uint16),
         (np.uint16, np.uint32),
-        # (np.uint16, np.uint64), # numexpr does not support uint64
-        (np.uint16, np.float32),
-        # (np.uint16, np.float64),
-        # (np.int32, np.int32),
-        (np.int32, np.int64),
-        (np.float32, np.float32),
         (np.float32, np.float64),
-        (np.complex64, np.complex64),
         (np.complex64, np.complex128),
+        pytest.param(np.int8, np.int16, marks=pytest.mark.heavy),
+        pytest.param(np.int8, np.int32, marks=pytest.mark.heavy),
+        pytest.param(np.int8, np.int64, marks=pytest.mark.heavy),
+        pytest.param(np.int8, np.float64, marks=pytest.mark.heavy),
+        pytest.param(np.uint16, np.uint16, marks=pytest.mark.heavy),
+        pytest.param(np.uint16, np.float32, marks=pytest.mark.heavy),
+        pytest.param(np.int32, np.int64, marks=pytest.mark.heavy),
+        pytest.param(np.float32, np.float32, marks=pytest.mark.heavy),
+        pytest.param(np.complex64, np.complex64, marks=pytest.mark.heavy),
     ],
 )
 def test_dtype_infer(dtype1, dtype2, scalar):
diff --git a/tests/ndarray/test_reductions.py b/tests/ndarray/test_reductions.py
index 7839d84b..ee1f738a 100644
--- a/tests/ndarray/test_reductions.py
+++ b/tests/ndarray/test_reductions.py
@@ -16,6 +16,52 @@
 NITEMS_SMALL = 1000
 NITEMS = 10_000
 
+_FAST_REDUCTION_OPS = [
+    "sum",
+    "prod",
+    "min",
+    "max",
+    "any",
+    "mean",
+    "argmax",
+    "cumulative_sum",
+    pytest.param("all", marks=pytest.mark.heavy),
+    pytest.param("std", marks=pytest.mark.heavy),
+    pytest.param("var", marks=pytest.mark.heavy),
+    pytest.param("argmin", marks=pytest.mark.heavy),
+    pytest.param("cumulative_prod", marks=pytest.mark.heavy),
+]
+
+_SAVE_REDUCTION_OPS = [
+    "sum",
+    "prod",
+    "min",
+    "mean",
+    "argmax",
+    "cumulative_sum",
+    pytest.param("max", marks=pytest.mark.heavy),
+    pytest.param("any", marks=pytest.mark.heavy),
+    pytest.param("all", marks=pytest.mark.heavy),
+    pytest.param("std", marks=pytest.mark.heavy),
+    pytest.param("var", marks=pytest.mark.heavy),
+    pytest.param("argmin", marks=pytest.mark.heavy),
+    pytest.param("cumulative_prod", marks=pytest.mark.heavy),
+]
+
+_MINIEXPR_REDUCTION_OPS = [
+    "sum",
+    "prod",
+    "min",
+    "mean",
+    "argmax",
+    pytest.param("max", marks=pytest.mark.heavy),
+    pytest.param("any", marks=pytest.mark.heavy),
+    pytest.param("all", marks=pytest.mark.heavy),
+    pytest.param("std", marks=pytest.mark.heavy),
+    pytest.param("var", marks=pytest.mark.heavy),
+    pytest.param("argmin", marks=pytest.mark.heavy),
+]
+
 
 @pytest.fixture(params=[np.float32, np.float64])
 def dtype_fixture(request):
@@ -189,7 +235,19 @@ def test_reduce_params(array_fixture, axis, keepdims, dtype_out, reduce_op, kwar
 # TODO: "prod" is not supported here because it overflows with current values
 @pytest.mark.parametrize(
     "reduce_op",
-    ["cumulative_sum", "sum", "min", "max", "mean", "std", "var", "any", "all", "argmax", "argmin"],
+    [
+        "cumulative_sum",
+        "sum",
+        "min",
+        "mean",
+        "argmax",
+        pytest.param("max", marks=pytest.mark.heavy),
+        pytest.param("std", marks=pytest.mark.heavy),
+        pytest.param("var", marks=pytest.mark.heavy),
+        pytest.param("any", marks=pytest.mark.heavy),
+        pytest.param("all", marks=pytest.mark.heavy),
+        pytest.param("argmin", marks=pytest.mark.heavy),
+    ],
 )
 @pytest.mark.parametrize("axis", [None, 0, 1])
 def test_reduce_expr_arr(array_fixture, axis, reduce_op):
@@ -223,23 +281,7 @@ def test_reduce_expr_arr(array_fixture, axis, reduce_op):
 
 
 # Test broadcasting
-@pytest.mark.parametrize(
-    "reduce_op",
-    [
-        "sum",
-        "mean",
-        "std",
-        "var",
-        "min",
-        "max",
-        "any",
-        "all",
-        "argmax",
-        "argmin",
-        "cumulative_sum",
-        "cumulative_prod",
-    ],
-)
+@pytest.mark.parametrize("reduce_op", _FAST_REDUCTION_OPS)
 @pytest.mark.parametrize("axis", [0, (0, 1), None])
 @pytest.mark.parametrize("keepdims", [True, False])
 @pytest.mark.parametrize(
@@ -247,7 +289,7 @@ def test_reduce_expr_arr(array_fixture, axis, reduce_op):
     [
         ((5, 5, 5), (5, 5), (5,)),
         ((10, 10, 10), (10, 10), (10,)),
-        ((100, 100, 100), (100, 100), (100,)),
+        pytest.param(((100, 100, 100), (100, 100), (100,)), marks=pytest.mark.heavy),
     ],
 )
 def test_broadcast_params(axis, keepdims, reduce_op, shapes):
@@ -393,32 +435,15 @@ def test_reduce_slice(reduce_op):
     [
         ((10, 50, 70), (10, 25, 50)),
         ((20, 50, 100), (10, 50, 100)),
-        ((10, 50, 100), (6, 25, 75)),
-        ((15, 30, 75), (7, 20, 50)),
-        ((1, 50, 100), (1, 50, 60)),
+        pytest.param((10, 50, 100), (6, 25, 75), marks=pytest.mark.heavy),
+        pytest.param((15, 30, 75), (7, 20, 50), marks=pytest.mark.heavy),
+        pytest.param((1, 50, 100), (1, 50, 60), marks=pytest.mark.heavy),
     ],
 )
 @pytest.mark.parametrize("disk", [True, False])
-@pytest.mark.parametrize("fill_value", [1, 0, 0.32])
-@pytest.mark.parametrize(
-    "reduce_op",
-    [
-        "sum",
-        "prod",
-        "min",
-        "max",
-        "any",
-        "all",
-        "mean",
-        "std",
-        "var",
-        "argmax",
-        "argmin",
-        "cumulative_sum",
-        "cumulative_prod",
-    ],
-)
-@pytest.mark.parametrize("axis", [None, 0, 1])
+@pytest.mark.parametrize("fill_value", [1, 0, pytest.param(0.32, marks=pytest.mark.heavy)])
+@pytest.mark.parametrize("reduce_op", _FAST_REDUCTION_OPS)
+@pytest.mark.parametrize("axis", [None, 0, pytest.param(1, marks=pytest.mark.heavy)])
 def test_fast_path(chunks, blocks, disk, fill_value, reduce_op, axis):
     shape = (20, 50, 100)
     urlpath = "a1.b2nd" if disk else None
@@ -455,15 +480,13 @@ def test_fast_path(chunks, blocks, disk, fill_value, reduce_op, axis):
     ("chunks", "blocks"),
     [
         ((2, 5, 10), (1, 5, 10)),
-        ((1, 3, 7), (1, 3, 5)),
-        ((5, 6, 10), (3, 3, 7)),
+        pytest.param((1, 3, 7), (1, 3, 5), marks=pytest.mark.heavy),
+        pytest.param((5, 6, 10), (3, 3, 7), marks=pytest.mark.heavy),
     ],
 )
 @pytest.mark.parametrize("disk", [True, False])
-@pytest.mark.parametrize("fill_value", [0, 1, 0.32])
-@pytest.mark.parametrize(
-    "reduce_op", ["sum", "prod", "min", "max", "any", "all", "mean", "std", "var", "argmax", "argmin"]
-)
+@pytest.mark.parametrize("fill_value", [0, 1, pytest.param(0.32, marks=pytest.mark.heavy)])
+@pytest.mark.parametrize("reduce_op", _MINIEXPR_REDUCTION_OPS)
 def test_miniexpr_slice(chunks, blocks, disk, fill_value, reduce_op):
     shape = (10, 10, 12)
     axis = None
@@ -486,26 +509,11 @@ def test_miniexpr_slice(chunks, blocks, disk, fill_value, reduce_op):
 
 
 @pytest.mark.parametrize("disk", [True, False])
-@pytest.mark.parametrize("fill_value", [0, 1, 0.32])
 @pytest.mark.parametrize(
-    "reduce_op",
-    [
-        "sum",
-        "prod",
-        "min",
-        "max",
-        "any",
-        "all",
-        "mean",
-        "std",
-        "var",
-        "argmax",
-        "argmin",
-        "cumulative_sum",
-        "cumulative_prod",
-    ],
+    "fill_value", [1, pytest.param(0, marks=pytest.mark.heavy), pytest.param(0.32, marks=pytest.mark.heavy)]
 )
-@pytest.mark.parametrize("axis", [0, (0, 1), None])
+@pytest.mark.parametrize("reduce_op", _SAVE_REDUCTION_OPS)
+@pytest.mark.parametrize("axis", [0, None, pytest.param((0, 1), marks=pytest.mark.heavy)])
 def test_save_version1(disk, fill_value, reduce_op, axis):
     shape = (20, 50, 100)
     if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"):
@@ -547,26 +555,11 @@ def test_save_version1(disk, fill_value, reduce_op, axis):
 
 
 @pytest.mark.parametrize("disk", [True, False])
-@pytest.mark.parametrize("fill_value", [0, 1, 0.32])
 @pytest.mark.parametrize(
-    "reduce_op",
-    [
-        "sum",
-        "prod",
-        "min",
-        "max",
-        "any",
-        "all",
-        "mean",
-        "std",
-        "var",
-        "argmax",
-        "argmin",
-        "cumulative_sum",
-        "cumulative_prod",
-    ],
+    "fill_value", [1, pytest.param(0, marks=pytest.mark.heavy), pytest.param(0.32, marks=pytest.mark.heavy)]
 )
-@pytest.mark.parametrize("axis", [0, (0, 1), None])
+@pytest.mark.parametrize("reduce_op", _SAVE_REDUCTION_OPS)
+@pytest.mark.parametrize("axis", [0, None, pytest.param((0, 1), marks=pytest.mark.heavy)])
 def test_save_version2(disk, fill_value, reduce_op, axis):
     shape = (20, 50, 100)
     if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"):
@@ -607,26 +600,11 @@ def test_save_version2(disk, fill_value, reduce_op, axis):
 
 
 @pytest.mark.parametrize("disk", [True, False])
-@pytest.mark.parametrize("fill_value", [0, 1, 0.32])
 @pytest.mark.parametrize(
-    "reduce_op",
-    [
-        "sum",
-        "prod",
-        "min",
-        "max",
-        "any",
-        "all",
-        "mean",
-        "std",
-        "var",
-        "argmax",
-        "argmin",
-        "cumulative_sum",
-        "cumulative_prod",
-    ],
+    "fill_value", [1, pytest.param(0, marks=pytest.mark.heavy), pytest.param(0.32, marks=pytest.mark.heavy)]
 )
-@pytest.mark.parametrize("axis", [0, (0, 1), None])
+@pytest.mark.parametrize("reduce_op", _SAVE_REDUCTION_OPS)
+@pytest.mark.parametrize("axis", [0, None, pytest.param((0, 1), marks=pytest.mark.heavy)])
 def test_save_version3(disk, fill_value, reduce_op, axis):
     shape = (20, 50, 100)
     if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"):
@@ -667,26 +645,11 @@ def test_save_version3(disk, fill_value, reduce_op, axis):
 
 
 @pytest.mark.parametrize("disk", [True, False])
-@pytest.mark.parametrize("fill_value", [0, 1, 0.32])
 @pytest.mark.parametrize(
-    "reduce_op",
-    [
-        "sum",
-        "prod",
-        "min",
-        "max",
-        "any",
-        "all",
-        "mean",
-        "std",
-        "var",
-        "argmax",
-        "argmin",
-        "cumulative_sum",
-        "cumulative_prod",
-    ],
+    "fill_value", [1, pytest.param(0, marks=pytest.mark.heavy), pytest.param(0.32, marks=pytest.mark.heavy)]
 )
-@pytest.mark.parametrize("axis", [0, (0, 1), None])
+@pytest.mark.parametrize("reduce_op", _SAVE_REDUCTION_OPS)
+@pytest.mark.parametrize("axis", [0, None, pytest.param((0, 1), marks=pytest.mark.heavy)])
 def test_save_version4(disk, fill_value, reduce_op, axis):
     if reduce_op in ("argmax", "argmin", "cumulative_sum", "cumulative_prod"):
         axis = 1 if isinstance(axis, tuple) else axis
diff --git a/tests/test_batch_array.py b/tests/test_batch_array.py
index 6ac5fa79..04254e79 100644
--- a/tests/test_batch_array.py
+++ b/tests/test_batch_array.py
@@ -380,17 +380,24 @@ def test_batcharray_respects_explicit_use_dict_and_non_zstd():
     assert barray.cparams.use_dict is False
 
 
-def test_batcharray_guess_items_per_block_uses_l1_for_clevel_5(monkeypatch):
+def test_batcharray_guess_items_per_block_uses_l1_for_low_clevel(monkeypatch):
     monkeypatch.setitem(blosc2.cpu_info, "l1_data_cache_size", 100)
     monkeypatch.setitem(blosc2.cpu_info, "l2_cache_size", 1000)
-    barray = blosc2.BatchArray(cparams={"clevel": 5})
+    barray = blosc2.BatchArray(cparams={"clevel": 3})
     assert barray._guess_blocksize([30, 30, 30, 30]) == 3
 
 
-def test_batcharray_guess_items_per_block_uses_l2_for_mid_clevel(monkeypatch):
+def test_batcharray_guess_items_per_block_uses_half_l2_for_default_clevel(monkeypatch):
+    monkeypatch.setitem(blosc2.cpu_info, "l1_data_cache_size", 100)
+    monkeypatch.setitem(blosc2.cpu_info, "l2_cache_size", 150)
+    barray = blosc2.BatchArray(cparams={"clevel": 5})
+    assert barray._guess_blocksize([60, 60, 60, 60]) == 1
+
+
+def test_batcharray_guess_items_per_block_uses_l2_for_high_clevel(monkeypatch):
     monkeypatch.setitem(blosc2.cpu_info, "l1_data_cache_size", 100)
     monkeypatch.setitem(blosc2.cpu_info, "l2_cache_size", 150)
-    barray = blosc2.BatchArray(cparams={"clevel": 6})
+    barray = blosc2.BatchArray(cparams={"clevel": 7})
     assert barray._guess_blocksize([60, 60, 60, 60]) == 2