Skip to content

Commit 4aea8f7

Browse files
branchseerclaude
andcommitted
feat(cache): add explicit inputs config for cache fingerprinting
Add `inputs` field to task configuration supporting: - Explicit glob patterns: `inputs: ["src/**/*.ts"]` - Auto-inference from fspy: `inputs: [{ auto: true }]` - Negative patterns: `inputs: ["src/**", "!**/*.test.ts"]` - Mixed mode: `inputs: ["package.json", { auto: true }, "!dist/**"]` - Empty array to disable file tracking: `inputs: []` Key changes: - Add `ResolvedInputConfig` to parse and normalize user input config - Add `glob_inputs.rs` for walking glob patterns and hashing files - Update `PreRunFingerprint` to include `input_config` and `glob_base` - Bump cache DB version to 6 for new fingerprint structure - Add comprehensive e2e tests for all input combinations Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent af0900f commit 4aea8f7

46 files changed

Lines changed: 2059 additions & 141 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

Cargo.lock

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/vite_task/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,10 @@ vite_str = { workspace = true }
4040
vite_task_graph = { workspace = true }
4141
vite_task_plan = { workspace = true }
4242
vite_workspace = { workspace = true }
43+
wax = { workspace = true }
44+
45+
[dev-dependencies]
46+
tempfile = { workspace = true }
4347

4448
[target.'cfg(unix)'.dependencies]
4549
nix = { workspace = true }

crates/vite_task/src/session/cache/display.rs

Lines changed: 0 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,6 @@ pub enum SpawnFingerprintChange {
4040
/// Working directory changed
4141
CwdChanged,
4242

43-
// Fingerprint ignores changes
44-
/// Fingerprint ignore pattern added
45-
FingerprintIgnoreAdded { pattern: Str },
46-
/// Fingerprint ignore pattern removed
47-
FingerprintIgnoreRemoved { pattern: Str },
4843
}
4944

5045
/// Format a single spawn fingerprint change as human-readable text.
@@ -70,12 +65,6 @@ pub fn format_spawn_change(change: &SpawnFingerprintChange) -> Str {
7065
SpawnFingerprintChange::ProgramChanged => Str::from("program changed"),
7166
SpawnFingerprintChange::ArgsChanged => Str::from("args changed"),
7267
SpawnFingerprintChange::CwdChanged => Str::from("working directory changed"),
73-
SpawnFingerprintChange::FingerprintIgnoreAdded { pattern } => {
74-
vite_str::format!("fingerprint ignore '{pattern}' added")
75-
}
76-
SpawnFingerprintChange::FingerprintIgnoreRemoved { pattern } => {
77-
vite_str::format!("fingerprint ignore '{pattern}' removed")
78-
}
7968
}
8069
}
8170

@@ -141,20 +130,6 @@ pub fn detect_spawn_fingerprint_changes(
141130
changes.push(SpawnFingerprintChange::CwdChanged);
142131
}
143132

144-
// Check fingerprint ignores changes
145-
let old_ignores: FxHashSet<_> =
146-
old.fingerprint_ignores().map(|v| v.iter().collect()).unwrap_or_default();
147-
let new_ignores: FxHashSet<_> =
148-
new.fingerprint_ignores().map(|v| v.iter().collect()).unwrap_or_default();
149-
for pattern in old_ignores.difference(&new_ignores) {
150-
changes
151-
.push(SpawnFingerprintChange::FingerprintIgnoreRemoved { pattern: (*pattern).clone() });
152-
}
153-
for pattern in new_ignores.difference(&old_ignores) {
154-
changes
155-
.push(SpawnFingerprintChange::FingerprintIgnoreAdded { pattern: (*pattern).clone() });
156-
}
157-
158133
changes
159134
}
160135

@@ -196,10 +171,6 @@ pub fn format_cache_status_inline(cache_status: &CacheStatus) -> Option<Str> {
196171
Some(SpawnFingerprintChange::ProgramChanged) => "program changed",
197172
Some(SpawnFingerprintChange::ArgsChanged) => "args changed",
198173
Some(SpawnFingerprintChange::CwdChanged) => "working directory changed",
199-
Some(
200-
SpawnFingerprintChange::FingerprintIgnoreAdded { .. }
201-
| SpawnFingerprintChange::FingerprintIgnoreRemoved { .. },
202-
) => "fingerprint ignores changed",
203174
None => "configuration changed",
204175
}
205176
}

crates/vite_task/src/session/cache/mod.rs

Lines changed: 105 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
33
pub mod display;
44

5-
use std::{fmt::Display, fs::File, io::Write, sync::Arc, time::Duration};
5+
use std::{collections::BTreeMap, fmt::Display, fs::File, io::Write, sync::Arc, time::Duration};
66

77
use bincode::{Decode, Encode, decode_from_slice, encode_to_vec};
88
// Re-export display functions for convenience
@@ -11,14 +11,32 @@ pub use display::{SpawnFingerprintChange, detect_spawn_fingerprint_changes, form
1111
use rusqlite::{Connection, OptionalExtension as _, config::DbConfig};
1212
use serde::{Deserialize, Serialize};
1313
use tokio::sync::Mutex;
14-
use vite_path::AbsolutePath;
14+
use vite_path::{AbsolutePath, RelativePathBuf};
15+
use vite_task_graph::config::ResolvedInputConfig;
1516
use vite_task_plan::cache_metadata::{CacheMetadata, ExecutionCacheKey, SpawnFingerprint};
1617

1718
use super::execute::{
1819
fingerprint::{PostRunFingerprint, PostRunFingerprintMismatch},
1920
spawn::StdOutput,
2021
};
2122

23+
/// Pre-run fingerprint computed at execution time.
24+
/// Contains spawn fingerprint, input configuration, and explicit input file hashes.
25+
#[derive(Debug, Encode, Decode, Serialize, PartialEq, Eq, Clone)]
26+
pub struct PreRunFingerprint {
27+
/// The spawn fingerprint (command, args, cwd, envs)
28+
pub spawn_fingerprint: SpawnFingerprint,
29+
/// Resolved input configuration that affects cache behavior.
30+
pub input_config: ResolvedInputConfig,
31+
/// Base directory for glob patterns, relative to workspace root.
32+
/// This is where the task is defined (package path).
33+
pub glob_base: RelativePathBuf,
34+
/// Hashes of explicit input files computed from positive globs.
35+
/// Files matching negative globs are already filtered out.
36+
/// Path is relative to workspace root, value is xxHash3_64 of file content.
37+
pub globbed_inputs: BTreeMap<RelativePathBuf, u64>,
38+
}
39+
2240
/// Command cache value, for validating post-run fingerprint after the spawn fingerprint is matched,
2341
/// and replaying the std outputs if validated.
2442
#[derive(Debug, Encode, Decode, Serialize)]
@@ -98,7 +116,7 @@ impl ExecutionCache {
98116
0 => {
99117
// fresh new db
100118
conn.execute(
101-
"CREATE TABLE spawn_fingerprint_cache (key BLOB PRIMARY KEY, value BLOB);",
119+
"CREATE TABLE pre_run_fingerprint_cache (key BLOB PRIMARY KEY, value BLOB);",
102120
(),
103121
)?;
104122
conn.execute(
@@ -129,27 +147,59 @@ impl ExecutionCache {
129147
Ok(())
130148
}
131149

132-
/// Try to hit cache with spawn fingerprint.
150+
/// Try to hit cache with pre-run fingerprint (spawn + globbed inputs).
133151
/// Returns `Ok(Ok(cache_value))` on cache hit, `Ok(Err(cache_miss))` on miss.
152+
///
153+
/// # Arguments
154+
/// * `cache_metadata` - Cache metadata from plan stage
155+
/// * `globbed_inputs` - Hashes of explicit input files computed from positive globs
156+
/// * `base_dir` - Workspace root for validating post-run fingerprint
134157
#[tracing::instrument(level = "debug", skip_all)]
135158
pub async fn try_hit(
136159
&self,
137160
cache_metadata: &CacheMetadata,
161+
globbed_inputs: BTreeMap<RelativePathBuf, u64>,
138162
base_dir: &AbsolutePath,
139163
) -> anyhow::Result<Result<CommandCacheValue, CacheMiss>> {
140164
let spawn_fingerprint = &cache_metadata.spawn_fingerprint;
141165
let execution_cache_key = &cache_metadata.execution_cache_key;
166+
let input_config = &cache_metadata.input_config;
167+
168+
// Convert absolute glob_base to relative for cache key
169+
let glob_base = cache_metadata
170+
.glob_base
171+
.strip_prefix(base_dir)
172+
.map_err(|e| anyhow::anyhow!("failed to strip prefix from glob_base: {e}"))?
173+
.ok_or_else(|| {
174+
anyhow::anyhow!(
175+
"glob_base {:?} is not inside workspace {:?}",
176+
cache_metadata.glob_base,
177+
base_dir
178+
)
179+
})?;
142180

143-
// Try to directly find the cache by spawn fingerprint first
144-
if let Some(cache_value) = self.get_by_spawn_fingerprint(spawn_fingerprint).await? {
145-
// Validate post-run fingerprint
146-
if let Some(post_run_fingerprint_mismatch) =
147-
cache_value.post_run_fingerprint.validate(base_dir)?
148-
{
149-
// Found the cache with the same spawn fingerprint, but the post-run fingerprint mismatches
150-
return Ok(Err(CacheMiss::FingerprintMismatch(
151-
FingerprintMismatch::PostRunFingerprintMismatch(post_run_fingerprint_mismatch),
152-
)));
181+
// Build pre-run fingerprint combining spawn fingerprint, input config, and globbed inputs
182+
let pre_run_fingerprint = PreRunFingerprint {
183+
spawn_fingerprint: spawn_fingerprint.clone(),
184+
input_config: input_config.clone(),
185+
glob_base,
186+
globbed_inputs,
187+
};
188+
189+
// Try to directly find the cache by pre-run fingerprint first
190+
if let Some(cache_value) = self.get_by_pre_run_fingerprint(&pre_run_fingerprint).await? {
191+
// Validate post-run fingerprint (inferred inputs) only if auto inference is enabled
192+
if input_config.includes_auto {
193+
if let Some(post_run_fingerprint_mismatch) =
194+
cache_value.post_run_fingerprint.validate(base_dir)?
195+
{
196+
// Found the cache with the same pre-run fingerprint, but the post-run fingerprint mismatches
197+
return Ok(Err(CacheMiss::FingerprintMismatch(
198+
FingerprintMismatch::PostRunFingerprintMismatch(
199+
post_run_fingerprint_mismatch,
200+
),
201+
)));
202+
}
153203
}
154204
// Associate the execution key to the spawn fingerprint if not already,
155205
// so that next time we can find it and report spawn fingerprint mismatch
@@ -158,8 +208,8 @@ impl ExecutionCache {
158208
return Ok(Ok(cache_value));
159209
}
160210

161-
// No cache found with the current spawn fingerprint,
162-
// check if execution key maps to different fingerprint
211+
// No cache found with the current pre-run fingerprint,
212+
// check if execution key maps to different spawn fingerprint
163213
if let Some(old_spawn_fingerprint) =
164214
self.get_fingerprint_by_execution_key(execution_cache_key).await?
165215
{
@@ -177,16 +227,46 @@ impl ExecutionCache {
177227
}
178228

179229
/// Update cache after successful execution.
230+
///
231+
/// # Arguments
232+
/// * `cache_metadata` - Cache metadata from plan stage
233+
/// * `globbed_inputs` - Hashes of explicit input files computed from positive globs
234+
/// * `base_dir` - Workspace root for converting absolute paths to relative
235+
/// * `cache_value` - The cache value to store (outputs and post-run fingerprint)
180236
#[tracing::instrument(level = "debug", skip_all)]
181237
pub async fn update(
182238
&self,
183239
cache_metadata: &CacheMetadata,
240+
globbed_inputs: BTreeMap<RelativePathBuf, u64>,
241+
base_dir: &AbsolutePath,
184242
cache_value: CommandCacheValue,
185243
) -> anyhow::Result<()> {
186244
let spawn_fingerprint = &cache_metadata.spawn_fingerprint;
187245
let execution_cache_key = &cache_metadata.execution_cache_key;
246+
let input_config = &cache_metadata.input_config;
247+
248+
// Convert absolute glob_base to relative for cache key
249+
let glob_base = cache_metadata
250+
.glob_base
251+
.strip_prefix(base_dir)
252+
.map_err(|e| anyhow::anyhow!("failed to strip prefix from glob_base: {e}"))?
253+
.ok_or_else(|| {
254+
anyhow::anyhow!(
255+
"glob_base {:?} is not inside workspace {:?}",
256+
cache_metadata.glob_base,
257+
base_dir
258+
)
259+
})?;
260+
261+
// Build pre-run fingerprint combining spawn fingerprint, input config, and globbed inputs
262+
let pre_run_fingerprint = PreRunFingerprint {
263+
spawn_fingerprint: spawn_fingerprint.clone(),
264+
input_config: input_config.clone(),
265+
glob_base,
266+
globbed_inputs,
267+
};
188268

189-
self.upsert_spawn_fingerprint_cache(spawn_fingerprint, &cache_value).await?;
269+
self.upsert_pre_run_fingerprint_cache(&pre_run_fingerprint, &cache_value).await?;
190270
self.upsert_execution_key_to_fingerprint(execution_cache_key, spawn_fingerprint).await?;
191271
Ok(())
192272
}
@@ -227,11 +307,11 @@ impl ExecutionCache {
227307
Ok(Some(value))
228308
}
229309

230-
async fn get_by_spawn_fingerprint(
310+
async fn get_by_pre_run_fingerprint(
231311
&self,
232-
spawn_fingerprint: &SpawnFingerprint,
312+
pre_run_fingerprint: &PreRunFingerprint,
233313
) -> anyhow::Result<Option<CommandCacheValue>> {
234-
self.get_key_by_value("spawn_fingerprint_cache", spawn_fingerprint).await
314+
self.get_key_by_value("pre_run_fingerprint_cache", pre_run_fingerprint).await
235315
}
236316

237317
async fn get_fingerprint_by_execution_key(
@@ -266,12 +346,12 @@ impl ExecutionCache {
266346
Ok(())
267347
}
268348

269-
async fn upsert_spawn_fingerprint_cache(
349+
async fn upsert_pre_run_fingerprint_cache(
270350
&self,
271-
spawn_fingerprint: &SpawnFingerprint,
351+
pre_run_fingerprint: &PreRunFingerprint,
272352
cache_value: &CommandCacheValue,
273353
) -> anyhow::Result<()> {
274-
self.upsert("spawn_fingerprint_cache", spawn_fingerprint, cache_value).await
354+
self.upsert("pre_run_fingerprint_cache", pre_run_fingerprint, cache_value).await
275355
}
276356

277357
async fn upsert_execution_key_to_fingerprint(
@@ -320,8 +400,8 @@ impl ExecutionCache {
320400
&mut out,
321401
)
322402
.await?;
323-
out.write_all(b"------- spawn_fingerprint_cache -------\n")?;
324-
self.list_table::<SpawnFingerprint, CommandCacheValue>("spawn_fingerprint_cache", &mut out)
403+
out.write_all(b"------- pre_run_fingerprint_cache -------\n")?;
404+
self.list_table::<PreRunFingerprint, CommandCacheValue>("pre_run_fingerprint_cache", &mut out)
325405
.await?;
326406
Ok(())
327407
}

crates/vite_task/src/session/execute/fingerprint.rs

Lines changed: 24 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ use serde::{Deserialize, Serialize};
1616
use vite_glob::GlobPatternSet;
1717
use vite_path::{AbsolutePath, RelativePathBuf};
1818
use vite_str::Str;
19+
use vite_task_graph::config::ResolvedInputConfig;
1920

2021
use super::spawn::PathRead;
2122
use crate::collections::HashMap;
@@ -24,8 +25,9 @@ use crate::collections::HashMap;
2425
/// Used to validate whether cached outputs are still valid.
2526
#[derive(Encode, Decode, Debug, Serialize)]
2627
pub struct PostRunFingerprint {
27-
/// Paths accessed during execution with their content fingerprints
28-
pub inputs: HashMap<RelativePathBuf, PathFingerprint>,
28+
/// Paths inferred from fspy during execution with their content fingerprints.
29+
/// Only populated when `input_config.includes_auto` is true.
30+
pub inferred_inputs: HashMap<RelativePathBuf, PathFingerprint>,
2931
}
3032

3133
/// Fingerprint for a single path (file or directory)
@@ -70,26 +72,30 @@ impl PostRunFingerprint {
7072
/// Creates a new fingerprint from path accesses after task execution.
7173
///
7274
/// # Arguments
73-
/// * `path_reads` - Map of paths that were read during execution
75+
/// * `path_reads` - Map of paths that were read during execution (from fspy)
7476
/// * `base_dir` - Workspace root for resolving relative paths
75-
/// * `fingerprint_ignores` - Optional glob patterns to exclude from fingerprinting
77+
/// * `input_config` - Resolved input configuration controlling what to fingerprint
7678
#[tracing::instrument(level = "debug", skip_all, name = "create_post_run_fingerprint")]
7779
pub fn create(
7880
path_reads: &HashMap<RelativePathBuf, PathRead>,
7981
base_dir: &AbsolutePath,
80-
fingerprint_ignores: Option<&[Str]>,
82+
input_config: &ResolvedInputConfig,
8183
) -> anyhow::Result<Self> {
82-
// Build ignore matcher from patterns if provided
83-
let ignore_matcher = fingerprint_ignores
84-
.filter(|patterns| !patterns.is_empty())
85-
.map(GlobPatternSet::new)
86-
.transpose()?;
84+
// If inference is disabled, return empty inferred_inputs
85+
if input_config.inference_disabled() {
86+
return Ok(Self { inferred_inputs: HashMap::default() });
87+
}
88+
89+
// Build negative pattern matcher for filtering inferred inputs
90+
let patterns: Vec<Str> = input_config.negative_globs.iter().cloned().collect();
91+
let negative_matcher =
92+
if patterns.is_empty() { None } else { Some(GlobPatternSet::new(&patterns)?) };
8793

88-
let inputs = path_reads
94+
let inferred_inputs = path_reads
8995
.par_iter()
9096
.filter(|(path, _)| {
91-
// Apply ignore patterns if present
92-
ignore_matcher.as_ref().is_none_or(|matcher| !matcher.is_match(path.as_str()))
97+
// Apply negative patterns to exclude from inferred inputs
98+
negative_matcher.as_ref().is_none_or(|matcher| !matcher.is_match(path.as_str()))
9399
})
94100
.map(|(relative_path, path_read)| {
95101
let full_path = Arc::<AbsolutePath>::from(base_dir.join(relative_path));
@@ -98,7 +104,7 @@ impl PostRunFingerprint {
98104
})
99105
.collect::<anyhow::Result<HashMap<_, _>>>()?;
100106

101-
Ok(Self { inputs })
107+
Ok(Self { inferred_inputs })
102108
}
103109

104110
/// Validates the fingerprint against current filesystem state.
@@ -108,8 +114,10 @@ impl PostRunFingerprint {
108114
&self,
109115
base_dir: &AbsolutePath,
110116
) -> anyhow::Result<Option<PostRunFingerprintMismatch>> {
111-
let input_mismatch =
112-
self.inputs.par_iter().find_map_any(|(input_relative_path, path_fingerprint)| {
117+
let input_mismatch = self
118+
.inferred_inputs
119+
.par_iter()
120+
.find_map_any(|(input_relative_path, path_fingerprint)| {
113121
let input_full_path = Arc::<AbsolutePath>::from(base_dir.join(input_relative_path));
114122
let path_read = PathRead {
115123
read_dir_entries: matches!(path_fingerprint, PathFingerprint::Folder(Some(_))),

0 commit comments

Comments
 (0)