Skip to content

Commit 758c519

Browse files
branchseerclaude
andcommitted
feat: add directory pruning for negative glob patterns in input selection
Move negative glob filtering from fingerprint.rs to spawn.rs so fspy-tracked paths are filtered at the absolute path stage. Refactor glob_inputs.rs to partition globs and walk from cleaned roots, enabling wax's automatic directory pruning for exhaustive negative patterns like !dist/**. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent a6369f5 commit 758c519

4 files changed

Lines changed: 187 additions & 131 deletions

File tree

crates/vite_task/src/session/execute/fingerprint.rs

Lines changed: 9 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
1515
use serde::{Deserialize, Serialize};
1616
use vite_path::{AbsolutePath, RelativePathBuf};
1717
use vite_str::Str;
18-
use vite_task_graph::config::ResolvedInputConfig;
1918

20-
use super::{glob_inputs::ResolvedGlob, spawn::PathRead};
19+
use super::spawn::PathRead;
2120
use crate::collections::HashMap;
2221

2322
/// Post-run fingerprint capturing file state after execution.
@@ -70,42 +69,26 @@ impl std::fmt::Display for PostRunFingerprintMismatch {
7069
impl PostRunFingerprint {
7170
/// Creates a new fingerprint from path accesses after task execution.
7271
///
72+
/// Negative glob filtering is done upstream in `spawn_with_tracking`.
73+
/// Paths may contain `..` components from fspy, so this method cleans them
74+
/// before fingerprinting.
75+
///
7376
/// # Arguments
74-
/// * `path_reads` - Map of paths that were read during execution (from fspy)
77+
/// * `inferred_path_reads` - Map of paths that were read during execution (from fspy)
7578
/// * `base_dir` - Workspace root for resolving relative paths
76-
/// * `glob_base` - Package directory where the task is defined (negative globs are relative to this)
77-
/// * `input_config` - Resolved input configuration controlling what to fingerprint
7879
#[tracing::instrument(level = "debug", skip_all, name = "create_post_run_fingerprint")]
7980
pub fn create(
8081
inferred_path_reads: &HashMap<RelativePathBuf, PathRead>,
8182
base_dir: &AbsolutePath,
82-
glob_base: &AbsolutePath,
83-
input_config: &ResolvedInputConfig,
8483
) -> anyhow::Result<Self> {
85-
// If inference is disabled, return empty inferred_inputs
86-
if !input_config.includes_auto {
87-
return Ok(Self { inferred_inputs: HashMap::default() });
88-
}
89-
90-
let negatives: Vec<ResolvedGlob> = input_config
91-
.negative_globs
92-
.iter()
93-
.map(|p| ResolvedGlob::new(p.as_str(), glob_base))
94-
.collect::<anyhow::Result<_>>()?;
95-
9684
let inferred_inputs = inferred_path_reads
9785
.par_iter()
98-
.filter_map(|(relative_path, path_read)| {
86+
.map(|(relative_path, path_read)| {
9987
// Clean the absolute path to normalize `..` from fspy-tracked paths
10088
// (e.g., `packages/sub-pkg/../shared/dist/output.js`).
10189
let cleaned_abs =
10290
path_clean::PathClean::clean(base_dir.join(relative_path).as_path());
10391

104-
// Apply negative globs against the cleaned path
105-
if negatives.iter().any(|neg| neg.matches(&cleaned_abs)) {
106-
return None;
107-
}
108-
10992
// Derive a cleaned workspace-relative key so stored paths are normalized
11093
let clean_key = cleaned_abs
11194
.strip_prefix(base_dir.as_path())
@@ -114,11 +97,8 @@ impl PostRunFingerprint {
11497
.unwrap_or_else(|| relative_path.clone());
11598

11699
let full_path = Arc::<AbsolutePath>::from(base_dir.join(&clean_key));
117-
let fingerprint = match fingerprint_path(&full_path, *path_read) {
118-
Ok(f) => f,
119-
Err(e) => return Some(Err(e)),
120-
};
121-
Some(Ok((clean_key, fingerprint)))
100+
let fingerprint = fingerprint_path(&full_path, *path_read)?;
101+
Ok((clean_key, fingerprint))
122102
})
123103
.collect::<anyhow::Result<HashMap<_, _>>>()?;
124104

crates/vite_task/src/session/execute/glob_inputs.rs

Lines changed: 104 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -10,71 +10,90 @@ use std::{
1010
io::{self, Read},
1111
};
1212

13-
use path_clean::PathClean;
1413
#[cfg(test)]
1514
use vite_path::AbsolutePathBuf;
1615
use vite_path::{AbsolutePath, RelativePathBuf};
1716
use vite_str::Str;
18-
use wax::{Glob, Program as _};
17+
use wax::{Glob, walk::Entry as _};
1918

20-
/// A glob pattern resolved to an absolute base directory.
19+
use super::spawn::ResolvedNegativeGlob;
20+
21+
/// Collect walk entries into the result map, filtering against resolved negatives.
2122
///
22-
/// Uses [`wax::Glob::partition`] to separate the invariant prefix from the
23-
/// wildcard suffix, then resolves the prefix to an absolute path via
24-
/// [`path_clean`] (normalizing components like `..`).
23+
/// Each positive glob is partitioned into an invariant prefix and a variant pattern.
24+
/// The prefix is joined with `base_dir` and cleaned (normalizing `..`) to get the walk root.
25+
/// The variant pattern is then walked from the cleaned root.
2526
///
26-
/// For example, `../shared/src/**` relative to `/ws/packages/app` resolves to:
27-
/// - `resolved_base`: `/ws/packages/shared/src`
28-
/// - `variant`: `Some(Glob("**"))`
29-
#[expect(clippy::disallowed_types, reason = "path_clean returns std::path::PathBuf")]
30-
pub struct ResolvedGlob {
31-
resolved_base: std::path::PathBuf,
32-
variant: Option<Glob<'static>>,
33-
}
34-
35-
impl ResolvedGlob {
36-
/// Resolve a glob pattern relative to `base_dir`.
37-
pub fn new(pattern: &str, base_dir: &AbsolutePath) -> anyhow::Result<Self> {
38-
let glob = Glob::new(pattern)?.into_owned();
39-
let (base_pathbuf, variant) = glob.partition();
40-
let base_str = base_pathbuf.to_str().unwrap_or(".");
41-
let resolved_base = if base_str.is_empty() {
42-
base_dir.as_path().to_path_buf()
43-
} else {
44-
base_dir.join(base_str).as_path().clean()
27+
/// Walk errors for non-existent directories are skipped gracefully.
28+
fn collect_walk_entries(
29+
walk: impl Iterator<Item = Result<wax::walk::GlobEntry, wax::walk::WalkError>>,
30+
workspace_root: &AbsolutePath,
31+
resolved_negatives: &[ResolvedNegativeGlob],
32+
result: &mut BTreeMap<RelativePathBuf, u64>,
33+
) -> anyhow::Result<()> {
34+
use path_clean::PathClean as _;
35+
use wax::Program as _;
36+
37+
for entry in walk {
38+
let entry = match entry {
39+
Ok(entry) => entry,
40+
Err(err) => {
41+
// WalkError -> io::Error preserves the error kind
42+
let io_err: io::Error = err.into();
43+
if io_err.kind() == io::ErrorKind::NotFound {
44+
continue;
45+
}
46+
return Err(io_err.into());
47+
}
4548
};
46-
Ok(Self { resolved_base, variant: variant.map(Glob::into_owned) })
47-
}
49+
if !entry.file_type().is_file() {
50+
continue;
51+
}
52+
53+
// Clean the path to normalize `..` components (from globs like `../shared/src/**`)
54+
let cleaned_path = entry.path().clean();
4855

49-
/// Walk the filesystem and yield matching file paths.
50-
#[expect(clippy::disallowed_types, reason = "yields std::path::PathBuf from wax walker")]
51-
pub fn walk(&self) -> Box<dyn Iterator<Item = std::path::PathBuf> + '_> {
52-
match &self.variant {
53-
Some(variant_glob) => Box::new(
54-
variant_glob
55-
.walk(&self.resolved_base)
56-
.filter_map(Result::ok)
57-
.map(wax::walk::Entry::into_path),
58-
),
59-
None => Box::new(std::iter::once(self.resolved_base.clone())),
56+
// Filter against resolved negatives
57+
if resolved_negatives.iter().any(|(prefix, variant)| {
58+
let Ok(remainder) = cleaned_path.strip_prefix(prefix) else {
59+
return false;
60+
};
61+
variant.as_ref().map_or(remainder.as_os_str().is_empty(), |v| v.is_match(remainder))
62+
}) {
63+
continue;
6064
}
61-
}
6265

63-
/// Check if an absolute path matches this resolved glob.
64-
#[expect(clippy::disallowed_types, reason = "matching against std::path::Path")]
65-
pub fn matches(&self, path: &std::path::Path) -> bool {
66-
path.strip_prefix(&self.resolved_base).ok().is_some_and(|remainder| {
67-
self.variant
68-
.as_ref()
69-
.map_or(remainder.as_os_str().is_empty(), |v| v.is_match(remainder))
70-
})
66+
// Compute path relative to workspace_root for the result
67+
let Some(relative_to_workspace) = cleaned_path
68+
.strip_prefix(workspace_root.as_path())
69+
.ok()
70+
.and_then(|p| RelativePathBuf::new(p).ok())
71+
else {
72+
continue; // Skip if path is outside workspace_root
73+
};
74+
75+
// Hash file content
76+
match hash_file_content(&cleaned_path) {
77+
Ok(hash) => {
78+
result.insert(relative_to_workspace, hash);
79+
}
80+
Err(err) if err.kind() == io::ErrorKind::NotFound => {
81+
// File was deleted between walk and hash, skip it
82+
}
83+
Err(err) => {
84+
return Err(err.into());
85+
}
86+
}
7187
}
88+
Ok(())
7289
}
7390

7491
/// Compute globbed inputs by walking positive glob patterns and filtering with negative patterns.
7592
///
76-
/// Glob patterns may contain `..` to reference files outside the package directory
77-
/// (e.g., `../shared/src/**` to include a sibling package's source files).
93+
/// Each glob is partitioned into an invariant prefix and a variant pattern. The prefix is
94+
/// joined with `base_dir` and cleaned to normalize `..` components, producing the walk root.
95+
/// The variant pattern walks the cleaned root. Negative patterns are resolved the same way
96+
/// and used to filter walked entries by matching against cleaned absolute paths.
7897
///
7998
/// # Arguments
8099
/// * `base_dir` - The package directory where the task is defined (globs are relative to this)
@@ -85,69 +104,59 @@ impl ResolvedGlob {
85104
/// # Returns
86105
/// A sorted map of relative paths (from `workspace_root`) to their content hashes.
87106
/// Only files are included (directories are skipped).
88-
///
89-
/// # Example
90-
/// ```ignore
91-
/// // For a task defined in `packages/foo/` with inputs: ["src/**/*.ts", "!**/*.test.ts"]
92-
/// let inputs = compute_globbed_inputs(
93-
/// &packages_foo_path,
94-
/// &workspace_root,
95-
/// &["src/**/*.ts".into()].into_iter().collect(),
96-
/// &["**/*.test.ts".into()].into_iter().collect(),
97-
/// )?;
98-
/// // Returns: { "packages/foo/src/index.ts" => 0x1234..., ... }
99-
/// ```
100107
pub fn compute_globbed_inputs(
101108
base_dir: &AbsolutePath,
102109
workspace_root: &AbsolutePath,
103110
positive_globs: &std::collections::BTreeSet<Str>,
104111
negative_globs: &std::collections::BTreeSet<Str>,
105112
) -> anyhow::Result<BTreeMap<RelativePathBuf, u64>> {
106-
// If no positive globs, return empty result
113+
use path_clean::PathClean as _;
114+
107115
if positive_globs.is_empty() {
108116
return Ok(BTreeMap::new());
109117
}
110118

111-
let negatives: Vec<ResolvedGlob> = negative_globs
119+
// Resolve negatives: partition + clean to get (absolute_prefix, variant)
120+
let resolved_negatives: Vec<ResolvedNegativeGlob> = negative_globs
112121
.iter()
113-
.map(|p| ResolvedGlob::new(p.as_str(), base_dir))
122+
.map(|p| {
123+
let glob = Glob::new(p.as_str())?.into_owned();
124+
let (prefix, variant) = glob.partition();
125+
let resolved = base_dir.as_path().join(&prefix).clean();
126+
Ok((resolved, variant.map(Glob::into_owned)))
127+
})
114128
.collect::<anyhow::Result<_>>()?;
115129

116130
let mut result = BTreeMap::new();
117131

118132
for pattern in positive_globs {
119-
let resolved = ResolvedGlob::new(pattern.as_str(), base_dir)?;
120-
121-
for absolute_path in resolved.walk() {
122-
// Skip non-files
123-
if !absolute_path.is_file() {
124-
continue;
133+
let pos = Glob::new(pattern.as_str())?.into_owned();
134+
let (pos_prefix, pos_variant) = pos.partition();
135+
let walk_root = base_dir.as_path().join(&pos_prefix).clean();
136+
137+
if let Some(variant_glob) = pos_variant {
138+
if walk_root.is_dir() {
139+
collect_walk_entries(
140+
variant_glob.into_owned().walk(&walk_root),
141+
workspace_root,
142+
&resolved_negatives,
143+
&mut result,
144+
)?;
125145
}
126-
127-
// Apply negative patterns
128-
if negatives.iter().any(|neg| neg.matches(&absolute_path)) {
129-
continue;
130-
}
131-
132-
// Compute path relative to workspace_root for the result
133-
let Some(relative_to_workspace) = absolute_path
134-
.strip_prefix(workspace_root.as_path())
135-
.ok()
136-
.and_then(|p| RelativePathBuf::new(p).ok())
137-
else {
138-
continue; // Skip if path is outside workspace_root
139-
};
140-
141-
// Hash file content
142-
match hash_file_content(&absolute_path) {
143-
Ok(hash) => {
144-
result.insert(relative_to_workspace, hash);
145-
}
146-
Err(err) if err.kind() == io::ErrorKind::NotFound => {
147-
// File was deleted between walk and hash, skip it
148-
}
149-
Err(err) => {
150-
return Err(err.into());
146+
} else {
147+
// Invariant-only glob (specific file path) — hash directly if it exists
148+
if walk_root.is_file()
149+
&& let Some(relative) = walk_root
150+
.strip_prefix(workspace_root.as_path())
151+
.ok()
152+
.and_then(|p| RelativePathBuf::new(p).ok())
153+
{
154+
match hash_file_content(&walk_root) {
155+
Ok(hash) => {
156+
result.insert(relative, hash);
157+
}
158+
Err(err) if err.kind() == io::ErrorKind::NotFound => {}
159+
Err(err) => return Err(err.into()),
151160
}
152161
}
153162
}

0 commit comments

Comments
 (0)