From d71277b2b2d5900c7270547b95562276bc2fc3cf Mon Sep 17 00:00:00 2001 From: Rosy-Glorious Miki Date: Wed, 27 May 2026 14:08:05 -0400 Subject: [PATCH 1/4] fix: returns error when closing call fails - resolves potential bug Signed-off-by: Rosy-Glorious Miki --- pkg/tar-patch/apply.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/tar-patch/apply.go b/pkg/tar-patch/apply.go index 98c9afe..34dd0f4 100644 --- a/pkg/tar-patch/apply.go +++ b/pkg/tar-patch/apply.go @@ -68,7 +68,7 @@ func (f *FilesystemDataSource) SetCurrentFile(file string) error { err := f.currentFile.Close() f.currentFile = nil if err != nil { - return nil + return err } } currentFile, err := os.Open(filepath.Join(f.basePath, file)) From 83bd11da8799e508fab1e7acde71609014455b68 Mon Sep 17 00:00:00 2001 From: Rosy-Glorious Miki Date: Tue, 2 Jun 2026 14:36:03 -0400 Subject: [PATCH 2/4] chore: updates CI to only necessary platforms, and removes patch version for broader compatibility Signed-off-by: Rosy-Glorious Miki --- .github/workflows/ci.yml | 4 ++-- README.md | 2 +- go.mod | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 10fba8d..7329206 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -97,7 +97,7 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - os: [ macos-latest, macos-26-intel ] + os: [ macos-latest] steps: - name: Check out repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 @@ -116,7 +116,7 @@ jobs: CGO_ENABLED: 0 strategy: matrix: - os: [ windows-latest, windows-11-arm ] + os: [ windows-latest] steps: - name: Check out repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 diff --git a/README.md b/README.md index 166fc3e..c7b4853 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ [![CI](https://github.com/containers/tar-diff/actions/workflows/ci.yml/badge.svg)](https://github.com/containers/tar-diff/actions/workflows/ci.yml) -[![Go Version](https://img.shields.io/badge/go-1.25-blue.svg)](https://golang.org/dl/) +[![Go Version](https://img.shields.io/badge/go-1.26-blue.svg)](https://golang.org/dl/) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE) [![codecov](https://codecov.io/gh/containers/tar-diff/graph/badge.svg)](https://codecov.io/gh/containers/tar-diff) diff --git a/go.mod b/go.mod index 40c5698..dc8371b 100644 --- a/go.mod +++ b/go.mod @@ -2,8 +2,6 @@ module github.com/containers/tar-diff go 1.26 -toolchain go1.26.2 - require ( github.com/containers/image/v5 v5.36.2 github.com/klauspost/compress v1.18.5 From 39984c8a3a79ee8c1521430ca665a013c8ede2fb Mon Sep 17 00:00:00 2001 From: Rosy-Glorious Miki Date: Tue, 2 Jun 2026 14:40:29 -0400 Subject: [PATCH 3/4] fix: add a validation function against missing source files before patching the delta Signed-off-by: Rosy-Glorious Miki --- pkg/tar-patch/apply.go | 90 +++++++++++++++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/pkg/tar-patch/apply.go b/pkg/tar-patch/apply.go index 34dd0f4..d545985 100644 --- a/pkg/tar-patch/apply.go +++ b/pkg/tar-patch/apply.go @@ -87,8 +87,96 @@ func (f *FilesystemDataSource) Seek(offset int64, whence int) (int64, error) { return f.currentFile.Seek(offset, whence) } +// validateRequiredFiles scans the delta to collect all required source files and validates they exist. +func validateRequiredFiles(delta io.ReadSeeker, dataSource *FilesystemDataSource) error { + // Read and validate header + buf := make([]byte, len(protocol.DeltaHeader)) + _, err := io.ReadFull(delta, buf) + if err != nil { + return err + } + if !bytes.Equal(buf, protocol.DeltaHeader[:]) { + return fmt.Errorf("invalid delta format") + } + + decoder, err := zstd.NewReader(delta) + if err != nil { + return err + } + + r := bufio.NewReader(decoder) + requiredFiles := make(map[string]bool) + + // Scan delta to collect all DeltaOpOpen operations + for { + op, err := r.ReadByte() + if err != nil { + if err == io.EOF { + break + } + return err + } + + size, err := binary.ReadUvarint(r) + if err != nil { + return err + } + + switch op { + case protocol.DeltaOpOpen: + nameBytes := make([]byte, size) + _, err = io.ReadFull(r, nameBytes) + if err != nil { + return err + } + requiredFiles[string(nameBytes)] = true + case protocol.DeltaOpData, protocol.DeltaOpAddData: + // Skip the data bytes + _, err = io.CopyN(io.Discard, r, int64(size)) + if err != nil { + return err + } + case protocol.DeltaOpCopy, protocol.DeltaOpSeek: + // These operations don't have additional data to skip + } + } + + // Close decoder before seeking to ensure no buffered data interferes + decoder.Close() + + // Validate all required files exist + for file := range requiredFiles { + cleanFile := protocol.CleanPath(file) + if len(cleanFile) == 0 { + continue // Skip invalid paths; Apply() will catch these + } + // Convert to native path separators for the current OS + nativePath := filepath.FromSlash(cleanFile) + filePath := filepath.Join(dataSource.basePath, nativePath) + if _, err := os.Stat(filePath); err != nil { + if os.IsNotExist(err) { + return fmt.Errorf("required source file does not exist: %s", cleanFile) + } + return fmt.Errorf("error accessing source file %s: %w", cleanFile, err) + } + } + + // Reset delta reader to the beginning for actual patching + _, err = delta.Seek(0, io.SeekStart) + return err +} + // Apply applies a binary patch from a delta reader to produce output using the data source. func Apply(delta io.Reader, dataSource DataSource, dst io.Writer) error { + // Validate required files if we have a seekable delta and FilesystemDataSource + if seeker, ok := delta.(io.ReadSeeker); ok { + if fsDataSource, ok := dataSource.(*FilesystemDataSource); ok { + if err := validateRequiredFiles(seeker, fsDataSource); err != nil { + return err + } + } + } + buf := make([]byte, len(protocol.DeltaHeader)) _, err := io.ReadFull(delta, buf) if err != nil { @@ -167,7 +255,7 @@ func Apply(delta io.Reader, dataSource DataSource, dst io.Writer) error { return err } - for i := uint64(0); i < size; i++ { + for i := range size { addBytes[i] += addBytes2[i] } if _, err := dst.Write(addBytes); err != nil { From 6ae47c4b4297850184fc7bc268238d6575a0b8cb Mon Sep 17 00:00:00 2001 From: Rosy-Glorious Miki Date: Thu, 4 Jun 2026 09:19:18 -0400 Subject: [PATCH 4/4] test: adds validation logging for Windows failure Signed-off-by: Rosy-Glorious Miki --- pkg/tar-diff/analysis.go | 15 ++++++++++++++ pkg/tar-diff/diff.go | 10 ++++++++++ pkg/tar-patch/apply.go | 42 ++++++++++++++++++++++++++++++++++++++-- tests/test-tar-errors.sh | 20 ++++++++++++++++++- 4 files changed, 84 insertions(+), 3 deletions(-) diff --git a/pkg/tar-diff/analysis.go b/pkg/tar-diff/analysis.go index d960757..7f81864 100644 --- a/pkg/tar-diff/analysis.go +++ b/pkg/tar-diff/analysis.go @@ -5,6 +5,7 @@ import ( "archive/tar" "crypto/sha1" "encoding/hex" + "fmt" "io" "log" "os" @@ -502,6 +503,20 @@ func analyzeForDelta(sources *SourceAnalysis, newTar *tarInfo, oldFiles []io.Rea options = NewOptions() } + // DEBUG: Log what files we found in source (old) tars + fmt.Fprintf(os.Stderr, "[DEBUG] analyzeForDelta: Found %d source files in old tar(s)\n", len(sources.sourceInfos)) + for i := range sources.sourceInfos { + s := &sources.sourceInfos[i] + fmt.Fprintf(os.Stderr, "[DEBUG] old: %q size=%d\n", s.file.paths[0], s.file.size) + } + + // DEBUG: Log what files we found in new tar + fmt.Fprintf(os.Stderr, "[DEBUG] analyzeForDelta: Found %d files in new tar\n", len(newTar.files)) + for i := range newTar.files { + file := &newTar.files[i] + fmt.Fprintf(os.Stderr, "[DEBUG] new: %q size=%d\n", file.paths[0], file.size) + } + targetInfos := make([]targetInfo, 0, len(newTar.files)+len(newTar.hardlinks)) sourceDataInfos := make(map[*sourceInfo]*sourceDataInfo) diff --git a/pkg/tar-diff/diff.go b/pkg/tar-diff/diff.go index bd6ff89..f0dd0cd 100644 --- a/pkg/tar-diff/diff.go +++ b/pkg/tar-diff/diff.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "log" + "os" "github.com/containers/image/v5/pkg/compression" ) @@ -159,15 +160,21 @@ func (g *deltaGenerator) generateForFile(info *targetInfo) error { maxBsdiffSize := g.options.maxBsdiffSize + fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: file=%q newSize=%d oldSize=%d maxBsdiffSize=%d\n", + file.paths[0], file.size, sourceFile.size, maxBsdiffSize) + // For files that are smaller than the path to the delta source plus some small // space for the delta header, skip doing deltas, as delta data will be larger // than the content. if file.size <= int64(len(sourceFile.paths[0])+4) { + fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: SKIPPED - file too small (size=%d <= pathLen+4=%d)\n", + file.size, len(sourceFile.paths[0])+4) return nil } switch { case sourceFile.sha1 == file.sha1 && sourceFile.size == file.size: + fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using WriteOldFile (sha1 match)\n") // Reuse exact file from old tar if err := g.deltaWriter.WriteOldFile(info.source.sourcePath, uint64(sourceFile.size)); err != nil { return err @@ -177,16 +184,19 @@ func (g *deltaGenerator) generateForFile(info *targetInfo) error { return err } case maxBsdiffSize == 0 || (file.size < maxBsdiffSize && sourceFile.size < maxBsdiffSize): + fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using BSDIFF (will call SetCurrentFile -> DeltaOpOpen)\n") // Use bsdiff to generate delta if err := g.generateForFileWithBsdiff(info); err != nil { return err } case info.rollsumMatches != nil && info.rollsumMatches.matchRatio > 20: + fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using ROLLSUMS (will call SetCurrentFile -> DeltaOpOpen)\n") // Use rollsums to generate delta if err := g.generateForFileWithrollsums(info); err != nil { return err } default: + fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using COPYREST (inline data, NO DeltaOpOpen)\n") if err := g.copyRest(); err != nil { return err } diff --git a/pkg/tar-patch/apply.go b/pkg/tar-patch/apply.go index d545985..5cbf36c 100644 --- a/pkg/tar-patch/apply.go +++ b/pkg/tar-patch/apply.go @@ -89,18 +89,23 @@ func (f *FilesystemDataSource) Seek(offset int64, whence int) (int64, error) { // validateRequiredFiles scans the delta to collect all required source files and validates they exist. func validateRequiredFiles(delta io.ReadSeeker, dataSource *FilesystemDataSource) error { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: ENTRY - basePath=%q\n", dataSource.basePath) + // Read and validate header buf := make([]byte, len(protocol.DeltaHeader)) _, err := io.ReadFull(delta, buf) if err != nil { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: failed to read header: %v\n", err) return err } if !bytes.Equal(buf, protocol.DeltaHeader[:]) { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: invalid delta format\n") return fmt.Errorf("invalid delta format") } decoder, err := zstd.NewReader(delta) if err != nil { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: failed to create zstd decoder: %v\n", err) return err } @@ -108,32 +113,41 @@ func validateRequiredFiles(delta io.ReadSeeker, dataSource *FilesystemDataSource requiredFiles := make(map[string]bool) // Scan delta to collect all DeltaOpOpen operations + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: scanning delta for DeltaOpOpen operations\n") + opCount := 0 for { op, err := r.ReadByte() if err != nil { if err == io.EOF { break } + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error reading op byte: %v\n", err) return err } size, err := binary.ReadUvarint(r) if err != nil { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error reading size: %v\n", err) return err } switch op { case protocol.DeltaOpOpen: + opCount++ nameBytes := make([]byte, size) _, err = io.ReadFull(r, nameBytes) if err != nil { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error reading filename: %v\n", err) return err } - requiredFiles[string(nameBytes)] = true + filename := string(nameBytes) + requiredFiles[filename] = true + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: DeltaOpOpen #%d: %q\n", opCount, filename) case protocol.DeltaOpData, protocol.DeltaOpAddData: // Skip the data bytes _, err = io.CopyN(io.Discard, r, int64(size)) if err != nil { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error skipping data: %v\n", err) return err } case protocol.DeltaOpCopy, protocol.DeltaOpSeek: @@ -144,25 +158,49 @@ func validateRequiredFiles(delta io.ReadSeeker, dataSource *FilesystemDataSource // Close decoder before seeking to ensure no buffered data interferes decoder.Close() + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: scan complete - found %d required files\n", len(requiredFiles)) + // Validate all required files exist for file := range requiredFiles { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: checking file=%q\n", file) + cleanFile := protocol.CleanPath(file) + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: after CleanPath=%q\n", cleanFile) + if len(cleanFile) == 0 { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: SKIPPED (cleanFile is empty)\n") continue // Skip invalid paths; Apply() will catch these } + // Convert to native path separators for the current OS nativePath := filepath.FromSlash(cleanFile) + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: after FromSlash=%q\n", nativePath) + filePath := filepath.Join(dataSource.basePath, nativePath) - if _, err := os.Stat(filePath); err != nil { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: full path=%q\n", filePath) + + fileInfo, err := os.Stat(filePath) + if err != nil { if os.IsNotExist(err) { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: FILE DOES NOT EXIST - returning error\n") return fmt.Errorf("required source file does not exist: %s", cleanFile) } + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: os.Stat error: %v\n", err) return fmt.Errorf("error accessing source file %s: %w", cleanFile, err) } + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: EXISTS - size=%d\n", fileInfo.Size()) } + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: all files validated successfully\n") + // Reset delta reader to the beginning for actual patching + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: seeking back to start\n") _, err = delta.Seek(0, io.SeekStart) + if err != nil { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: seek failed: %v\n", err) + } else { + fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: EXIT - success\n") + } return err } diff --git a/tests/test-tar-errors.sh b/tests/test-tar-errors.sh index f87f068..af92082 100755 --- a/tests/test-tar-errors.sh +++ b/tests/test-tar-errors.sh @@ -14,7 +14,7 @@ expect_fail () { local desc="$1" shift set +e - "$@" &>/dev/null + "$@" local code=$? set -e if [[ "$code" -eq 0 ]]; then @@ -61,13 +61,31 @@ expect_fail "tar-patch missing base dir" ./tar-patch "$TEST_DIR/bad-magic.tardif # Force bsdiff-sized payload + -max-bsdiff-size so the delta emits OPEN for data/only.txt; # otherwise copyRest-only deltas can apply without that file and expect_fail would flake. mkdir -p "$TEST_DIR/solo/data" "$TEST_DIR/solom/data" + +echo "=== DEBUG: Creating solo test files ===" >&2 head -c 4096 /dev/zero >"$TEST_DIR/solo/data/only.txt" +echo "=== DEBUG: Created solo/data/only.txt, size=$(stat -c%s "$TEST_DIR/solo/data/only.txt" 2>/dev/null || stat -f%z "$TEST_DIR/solo/data/only.txt" 2>/dev/null || wc -c < "$TEST_DIR/solo/data/only.txt")" >&2 + cp -a "$TEST_DIR/solo/data/only.txt" "$TEST_DIR/solom/data/only.txt" printf 'patched' | dd of="$TEST_DIR/solom/data/only.txt" bs=1 seek=2000 conv=notrunc status=none 2>/dev/null || \ printf 'patched' | dd of="$TEST_DIR/solom/data/only.txt" bs=1 seek=2000 conv=notrunc 2>/dev/null + +echo "=== DEBUG: Creating tar archives ===" >&2 create_tar "$TEST_DIR/solo-old.tar" "$TEST_DIR/solo" +echo "=== DEBUG: Created solo-old.tar.gz, size=$(stat -c%s "$TEST_DIR/solo-old.tar.gz" 2>/dev/null || stat -f%z "$TEST_DIR/solo-old.tar.gz" 2>/dev/null || wc -c < "$TEST_DIR/solo-old.tar.gz")" >&2 + create_tar "$TEST_DIR/solo-new.tar" "$TEST_DIR/solom" +echo "=== DEBUG: Created solo-new.tar.bz2, size=$(stat -c%s "$TEST_DIR/solo-new.tar.bz2" 2>/dev/null || stat -f%z "$TEST_DIR/solo-new.tar.bz2" 2>/dev/null || wc -c < "$TEST_DIR/solo-new.tar.bz2")" >&2 + +echo "=== DEBUG: Running tar-diff to create solo.tardiff ===" >&2 ./tar-diff -max-bsdiff-size 64 "$TEST_DIR/solo-old.tar.gz" "$TEST_DIR/solo-new.tar.bz2" "$TEST_DIR/solo.tardiff" +tardiff_exit=$? +echo "=== DEBUG: tar-diff exit code: $tardiff_exit ===" >&2 +if [[ -f "$TEST_DIR/solo.tardiff" ]]; then + echo "=== DEBUG: solo.tardiff created, size=$(stat -c%s "$TEST_DIR/solo.tardiff" 2>/dev/null || stat -f%z "$TEST_DIR/solo.tardiff" 2>/dev/null || wc -c < "$TEST_DIR/solo.tardiff")" >&2 +else + echo "=== DEBUG: solo.tardiff WAS NOT CREATED!" >&2 +fi rm -f "$TEST_DIR/solo/data/only.txt" if [[ -e "$TEST_DIR/solo/data/only.txt" ]]; then echo "expected solo/data/only.txt removed before tar-patch" >&2