Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,20 @@
### Added

- New `HybridReader` that composes any primary `Reader` with an in-process `DuckDBReader` for staging. `register()` writes to staging; `execute_sql` routes queries that reference registered names to staging and everything else to the primary. Available behind the existing `duckdb` feature.
- `HybridReader` now caches query results in its staging DuckDB to keep
visualization iteration fast across `DRAW`/`SCALE`/`FACET` tweaks. Cache
hits are sub-millisecond; entries are evicted by TTL (default 300s) and
by an LRU byte-budget (default 512 MB). Tunable via
`HybridReader::with_cache_config(...)` and globally disabled with
`GGSQL_HYBRID_CACHE_DISABLED=1`. The Jupyter kernel adds a
`-- @uncache` meta-command that clears the cache without restarting the
session, and the kernel now emits both `application/vnd.vegalite.v5+json`
and `v6+json` mime payloads so JupyterLab 4.x (built-in v5 renderer) and
nteract / newer Lab extensions (v6 renderer) both display visualizations
natively without falling back to embedded HTML.
- `Reader::clear_cache()` trait method (default `Ok(())`) — readers without
a cache inherit it as a no-op; `HybridReader` overrides to drop its
cache tables.
- New `AdbcReader<D: Driver>` for connecting to data sources via
[ADBC](https://arrow.apache.org/adbc/) (Arrow Database Connectivity), behind
a new off-by-default `adbc` feature flag. Generic over any concrete
Expand Down
3 changes: 3 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

88 changes: 85 additions & 3 deletions ggsql-jupyter/src/display.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,15 +100,71 @@ fn format_connection_changed(display_name: &str) -> Value {

/// Format Vega-Lite visualization as display_data
fn format_vegalite(spec: String, hints: &RenderHints) -> Value {
// Parse the spec ONCE up front. If it is not valid JSON we cannot
// produce a meaningful Vega-Lite bundle and would only confuse the
// notebook frontend by emitting a v5/v6 mime payload that wraps an
// `{"error": "..."}` placeholder. Return a plain-text error output in
// that case so the failure surfaces cleanly to the user instead of
// rendering as a silently broken chart.
let spec_value: Value = match serde_json::from_str(&spec) {
Ok(v) => v,
Err(e) => {
tracing::error!("Failed to parse Vega-Lite JSON: {}", e);
return json!({
"data": {
"text/plain": format!("ggsql: invalid Vega-Lite spec: {e}")
},
"metadata": {},
"transient": {}
});
}
};

let html = vegalite_html(&spec, hints);

// Rewrite the spec's $schema to v5 for the v5 mime bundle so clients
// that validate the schema URL against the mime version (notably
// JupyterLab 4.x's built-in @jupyterlab/vega5-extension) accept it.
// The two mime payloads are otherwise identical; ggsql's generated
// specs use core Vega-Lite features that are stable across v5 and v6.
let mut spec_v5 = spec_value.clone();
if let Some(obj) = spec_v5.as_object_mut() {
obj.insert(
"$schema".to_string(),
json!("https://vega.github.io/schema/vega-lite/v5.json"),
);
}

json!({
"data": {
// Newer native mime bundle. nteract and newer Lab extensions
// render this directly. JupyterLab 4.x does NOT have a built-in
// v6 renderer and will fall through to the v5 bundle below.
"application/vnd.vegalite.v6+json": spec_value,

// v5 native mime bundle — JupyterLab 4.x has a built-in
// renderer for this (no extra extensions, no script execution,
// no CDN round-trip). Chosen preferentially over text/html.
"application/vnd.vegalite.v5+json": spec_v5,

// HTML with embedded vega-embed as a last-resort fallback for
// clients that lack any native Vega-Lite renderer. Requires
// notebook trust because it contains a <script> tag, and
// depends on CDN reachability for vega-embed.
"text/html": html,

// Text fallback
"text/plain": "Vega-Lite visualization".to_string()
},
"metadata": {},
"transient": {},
"output_location": "plot"
"metadata": {
// Positron-specific routing hint to send output to the Plots
// pane. Placed inside `metadata` (not at the top level of the
// output object) per Jupyter's notebook format schema — a
// top-level `output_location` fails notebook JSON validation
// and JupyterLab drops the output silently.
"output_location": "plot"
},
"transient": {}
})
}

Expand Down Expand Up @@ -387,6 +443,32 @@ mod tests {
assert!(display["data"]["text/plain"].is_string());
}

#[test]
fn format_vegalite_emits_both_v5_and_v6_mime() {
let spec =
r#"{"$schema":"https://vega.github.io/schema/vega-lite/v6.json","mark":"point"}"#
.to_string();
let hints = RenderHints::default();
let payload = super::format_vegalite(spec, &hints);

let data = payload.get("data").expect("payload has data");
assert!(
data.get("application/vnd.vegalite.v5+json").is_some(),
"v5 mime missing"
);
assert!(
data.get("application/vnd.vegalite.v6+json").is_some(),
"v6 mime missing"
);

// The v5 payload's $schema must be rewritten to v5.
let v5 = data.get("application/vnd.vegalite.v5+json").unwrap();
assert_eq!(
v5.get("$schema").and_then(|s| s.as_str()),
Some("https://vega.github.io/schema/vega-lite/v5.json")
);
}

#[test]
fn test_empty_dataframe_returns_none() {
// DDL statements return DataFrames with 0 columns
Expand Down
38 changes: 38 additions & 0 deletions ggsql-jupyter/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,22 @@ pub fn parse_meta_command(code: &str) -> Option<String> {
.map(|rest| rest.trim().to_string())
}

/// The `-- @uncache` meta-command prefix.
const META_UNCACHE_PREFIX: &str = "-- @uncache";

/// Parse a `-- @uncache` meta-command. Returns `Some(())` if `code`,
/// after trimming, is exactly the prefix followed only by whitespace.
pub fn parse_uncache_meta_command(code: &str) -> Option<()> {
let trimmed = code.trim();
if trimmed.starts_with(META_UNCACHE_PREFIX)
&& trimmed[META_UNCACHE_PREFIX.len()..].trim().is_empty()
{
Some(())
} else {
None
}
}

/// Query executor maintaining persistent database connection
pub struct QueryExecutor {
reader: Box<dyn Reader + Send>,
Expand Down Expand Up @@ -200,6 +216,12 @@ impl QueryExecutor {
tracing::debug!("Executing query: {} chars", code.len());

// Check for meta-commands first
if parse_uncache_meta_command(code).is_some() {
tracing::info!("Meta-command: clearing cache");
self.reader.clear_cache()?;
return Ok(ExecutionResult::DataFrame(ggsql::DataFrame::empty()));
}

if let Some(uri) = parse_meta_command(code) {
tracing::info!("Meta-command: switching reader to {}", uri);
self.swap_reader(&uri)?;
Expand Down Expand Up @@ -285,6 +307,22 @@ mod tests {
assert_eq!(parse_meta_command("SELECT 1"), None);
}

#[test]
fn uncache_meta_command_parses() {
assert_eq!(parse_uncache_meta_command("-- @uncache"), Some(()));
assert_eq!(parse_uncache_meta_command("-- @uncache \n"), Some(()));
assert_eq!(parse_uncache_meta_command("SELECT 1"), None);
}

#[test]
fn uncache_clears_reader_cache() {
let mut ex = QueryExecutor::new().unwrap();
// duckdb://memory doesn't have HybridReader; clear_cache is a no-op.
// We just assert the dispatch doesn't error.
let res = ex.execute("-- @uncache").unwrap();
assert!(matches!(res, ExecutionResult::DataFrame(_)));
}

#[test]
fn test_meta_command_switches_reader() {
let mut executor = QueryExecutor::new().unwrap();
Expand Down
3 changes: 3 additions & 0 deletions src/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ rand.workspace = true
sprintf = "0.4"
const_format.workspace = true
uuid.workspace = true
sha2 = "0.10"
hex = "0.4"
tracing.workspace = true

[dev-dependencies]
jsonschema = { version = "0.44", default-features = false, features = ["resolve-file"] }
Expand Down
Loading