Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ macos-latest, macos-26-intel ]
os: [ macos-latest]
steps:
- name: Check out repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
Expand All @@ -116,7 +116,7 @@ jobs:
CGO_ENABLED: 0
strategy:
matrix:
os: [ windows-latest, windows-11-arm ]
os: [ windows-latest]
steps:
- name: Check out repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
[![CI](https://github.com/containers/tar-diff/actions/workflows/ci.yml/badge.svg)](https://github.com/containers/tar-diff/actions/workflows/ci.yml)
[![Go Version](https://img.shields.io/badge/go-1.25-blue.svg)](https://golang.org/dl/)
[![Go Version](https://img.shields.io/badge/go-1.26-blue.svg)](https://golang.org/dl/)
[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](LICENSE)
[![codecov](https://codecov.io/gh/containers/tar-diff/graph/badge.svg)](https://codecov.io/gh/containers/tar-diff)

Expand Down
2 changes: 0 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ module github.com/containers/tar-diff

go 1.26

toolchain go1.26.2

require (
github.com/containers/image/v5 v5.36.2
github.com/klauspost/compress v1.18.5
Expand Down
15 changes: 15 additions & 0 deletions pkg/tar-diff/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"archive/tar"
"crypto/sha1"
"encoding/hex"
"fmt"
"io"
"log"
"os"
Expand Down Expand Up @@ -502,6 +503,20 @@ func analyzeForDelta(sources *SourceAnalysis, newTar *tarInfo, oldFiles []io.Rea
options = NewOptions()
}

// DEBUG: Log what files we found in source (old) tars
fmt.Fprintf(os.Stderr, "[DEBUG] analyzeForDelta: Found %d source files in old tar(s)\n", len(sources.sourceInfos))
for i := range sources.sourceInfos {
s := &sources.sourceInfos[i]
fmt.Fprintf(os.Stderr, "[DEBUG] old: %q size=%d\n", s.file.paths[0], s.file.size)
}

// DEBUG: Log what files we found in new tar
fmt.Fprintf(os.Stderr, "[DEBUG] analyzeForDelta: Found %d files in new tar\n", len(newTar.files))
for i := range newTar.files {
file := &newTar.files[i]
fmt.Fprintf(os.Stderr, "[DEBUG] new: %q size=%d\n", file.paths[0], file.size)
}

targetInfos := make([]targetInfo, 0, len(newTar.files)+len(newTar.hardlinks))
sourceDataInfos := make(map[*sourceInfo]*sourceDataInfo)

Expand Down
10 changes: 10 additions & 0 deletions pkg/tar-diff/diff.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"fmt"
"io"
"log"
"os"

"github.com/containers/image/v5/pkg/compression"
)
Expand Down Expand Up @@ -159,15 +160,21 @@ func (g *deltaGenerator) generateForFile(info *targetInfo) error {

maxBsdiffSize := g.options.maxBsdiffSize

fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: file=%q newSize=%d oldSize=%d maxBsdiffSize=%d\n",
file.paths[0], file.size, sourceFile.size, maxBsdiffSize)

// For files that are smaller than the path to the delta source plus some small
// space for the delta header, skip doing deltas, as delta data will be larger
// than the content.
if file.size <= int64(len(sourceFile.paths[0])+4) {
fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: SKIPPED - file too small (size=%d <= pathLen+4=%d)\n",
file.size, len(sourceFile.paths[0])+4)
return nil
}

switch {
case sourceFile.sha1 == file.sha1 && sourceFile.size == file.size:
fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using WriteOldFile (sha1 match)\n")
// Reuse exact file from old tar
if err := g.deltaWriter.WriteOldFile(info.source.sourcePath, uint64(sourceFile.size)); err != nil {
return err
Expand All @@ -177,16 +184,19 @@ func (g *deltaGenerator) generateForFile(info *targetInfo) error {
return err
}
case maxBsdiffSize == 0 || (file.size < maxBsdiffSize && sourceFile.size < maxBsdiffSize):
fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using BSDIFF (will call SetCurrentFile -> DeltaOpOpen)\n")
// Use bsdiff to generate delta
if err := g.generateForFileWithBsdiff(info); err != nil {
return err
}
case info.rollsumMatches != nil && info.rollsumMatches.matchRatio > 20:
fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using ROLLSUMS (will call SetCurrentFile -> DeltaOpOpen)\n")
// Use rollsums to generate delta
if err := g.generateForFileWithrollsums(info); err != nil {
return err
}
default:
fmt.Fprintf(os.Stderr, "[DEBUG] generateForFile: using COPYREST (inline data, NO DeltaOpOpen)\n")
if err := g.copyRest(); err != nil {
return err
}
Expand Down
130 changes: 128 additions & 2 deletions pkg/tar-patch/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func (f *FilesystemDataSource) SetCurrentFile(file string) error {
err := f.currentFile.Close()
f.currentFile = nil
if err != nil {
return nil
return err
}
}
currentFile, err := os.Open(filepath.Join(f.basePath, file))
Expand All @@ -87,8 +87,134 @@ func (f *FilesystemDataSource) Seek(offset int64, whence int) (int64, error) {
return f.currentFile.Seek(offset, whence)
}

// validateRequiredFiles scans the delta to collect all required source files and validates they exist.
func validateRequiredFiles(delta io.ReadSeeker, dataSource *FilesystemDataSource) error {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: ENTRY - basePath=%q\n", dataSource.basePath)

// Read and validate header
buf := make([]byte, len(protocol.DeltaHeader))
_, err := io.ReadFull(delta, buf)
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: failed to read header: %v\n", err)
return err
}
if !bytes.Equal(buf, protocol.DeltaHeader[:]) {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: invalid delta format\n")
return fmt.Errorf("invalid delta format")
}

decoder, err := zstd.NewReader(delta)
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: failed to create zstd decoder: %v\n", err)
return err
}
Comment on lines +106 to +110

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The zstd.Reader is not closed if an error occurs during the delta scanning loop, leading to a resource leak. Deferring decoder.Close() immediately after its successful creation ensures that resources are always released, even on error paths. Note that the manual call to decoder.Close() later in the function can then be removed.

Suggested change
decoder, err := zstd.NewReader(delta)
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: failed to create zstd decoder: %v\n", err)
return err
}
decoder, err := zstd.NewReader(delta)
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: failed to create zstd decoder: %v\n", err)
return err
}
defer decoder.Close()


r := bufio.NewReader(decoder)
requiredFiles := make(map[string]bool)

// Scan delta to collect all DeltaOpOpen operations
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: scanning delta for DeltaOpOpen operations\n")
opCount := 0
for {
op, err := r.ReadByte()
if err != nil {
if err == io.EOF {
break
}
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error reading op byte: %v\n", err)
return err
}

size, err := binary.ReadUvarint(r)
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error reading size: %v\n", err)
return err
}

switch op {
case protocol.DeltaOpOpen:
opCount++
nameBytes := make([]byte, size)
Comment on lines +135 to +137

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

security-high high

The size of the filename is read directly from the delta file and used to allocate nameBytes without validation. A maliciously crafted delta file with an extremely large size could cause an out-of-memory (OOM) panic, leading to a Denial of Service (DoS). Please validate size against maxFilenameSize before allocating memory, similar to the check performed in Apply.

case protocol.DeltaOpOpen:
			opCount++
			if size > maxFilenameSize {
				return fmt.Errorf("filename size %d exceeds maximum allowed %d", size, maxFilenameSize)
			}
			nameBytes := make([]byte, size)

_, err = io.ReadFull(r, nameBytes)
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error reading filename: %v\n", err)
return err
}
filename := string(nameBytes)
requiredFiles[filename] = true
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: DeltaOpOpen #%d: %q\n", opCount, filename)
case protocol.DeltaOpData, protocol.DeltaOpAddData:
// Skip the data bytes
_, err = io.CopyN(io.Discard, r, int64(size))
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: error skipping data: %v\n", err)
return err
}
case protocol.DeltaOpCopy, protocol.DeltaOpSeek:
// These operations don't have additional data to skip
}
}

// Close decoder before seeking to ensure no buffered data interferes
decoder.Close()

fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: scan complete - found %d required files\n", len(requiredFiles))

// Validate all required files exist
for file := range requiredFiles {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: checking file=%q\n", file)

cleanFile := protocol.CleanPath(file)
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: after CleanPath=%q\n", cleanFile)

if len(cleanFile) == 0 {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: SKIPPED (cleanFile is empty)\n")
continue // Skip invalid paths; Apply() will catch these
}

// Convert to native path separators for the current OS
nativePath := filepath.FromSlash(cleanFile)
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: after FromSlash=%q\n", nativePath)

filePath := filepath.Join(dataSource.basePath, nativePath)
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: full path=%q\n", filePath)

fileInfo, err := os.Stat(filePath)
if err != nil {
if os.IsNotExist(err) {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: FILE DOES NOT EXIST - returning error\n")
return fmt.Errorf("required source file does not exist: %s", cleanFile)
}
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: os.Stat error: %v\n", err)
return fmt.Errorf("error accessing source file %s: %w", cleanFile, err)
}
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: EXISTS - size=%d\n", fileInfo.Size())
}

fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: all files validated successfully\n")

// Reset delta reader to the beginning for actual patching
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: seeking back to start\n")
_, err = delta.Seek(0, io.SeekStart)
if err != nil {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: seek failed: %v\n", err)
} else {
fmt.Fprintf(os.Stderr, "[DEBUG] validateRequiredFiles: EXIT - success\n")
}
return err
}

// Apply applies a binary patch from a delta reader to produce output using the data source.
func Apply(delta io.Reader, dataSource DataSource, dst io.Writer) error {
// Validate required files if we have a seekable delta and FilesystemDataSource
if seeker, ok := delta.(io.ReadSeeker); ok {
if fsDataSource, ok := dataSource.(*FilesystemDataSource); ok {
if err := validateRequiredFiles(seeker, fsDataSource); err != nil {
return err
}
}
}
Comment on lines +210 to +216

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Calling validateRequiredFiles decompresses and scans the entire delta file to check for required files, and then Apply decompresses and scans it a second time to perform the actual patching. For large delta files, decompressing the zstd stream twice will double the CPU and I/O overhead. Consider if this pre-validation is strictly necessary, or if validation can be performed on-the-fly during the main patch application to avoid the performance penalty.


buf := make([]byte, len(protocol.DeltaHeader))
_, err := io.ReadFull(delta, buf)
if err != nil {
Expand Down Expand Up @@ -167,7 +293,7 @@ func Apply(delta io.Reader, dataSource DataSource, dst io.Writer) error {
return err
}

for i := uint64(0); i < size; i++ {
for i := range size {
addBytes[i] += addBytes2[i]
}
if _, err := dst.Write(addBytes); err != nil {
Expand Down
20 changes: 19 additions & 1 deletion tests/test-tar-errors.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ expect_fail () {
local desc="$1"
shift
set +e
"$@" &>/dev/null
"$@"
local code=$?
set -e
if [[ "$code" -eq 0 ]]; then
Expand Down Expand Up @@ -61,13 +61,31 @@ expect_fail "tar-patch missing base dir" ./tar-patch "$TEST_DIR/bad-magic.tardif
# Force bsdiff-sized payload + -max-bsdiff-size so the delta emits OPEN for data/only.txt;
# otherwise copyRest-only deltas can apply without that file and expect_fail would flake.
mkdir -p "$TEST_DIR/solo/data" "$TEST_DIR/solom/data"

echo "=== DEBUG: Creating solo test files ===" >&2
head -c 4096 /dev/zero >"$TEST_DIR/solo/data/only.txt"
echo "=== DEBUG: Created solo/data/only.txt, size=$(stat -c%s "$TEST_DIR/solo/data/only.txt" 2>/dev/null || stat -f%z "$TEST_DIR/solo/data/only.txt" 2>/dev/null || wc -c < "$TEST_DIR/solo/data/only.txt")" >&2

cp -a "$TEST_DIR/solo/data/only.txt" "$TEST_DIR/solom/data/only.txt"
printf 'patched' | dd of="$TEST_DIR/solom/data/only.txt" bs=1 seek=2000 conv=notrunc status=none 2>/dev/null || \
printf 'patched' | dd of="$TEST_DIR/solom/data/only.txt" bs=1 seek=2000 conv=notrunc 2>/dev/null

echo "=== DEBUG: Creating tar archives ===" >&2
create_tar "$TEST_DIR/solo-old.tar" "$TEST_DIR/solo"
echo "=== DEBUG: Created solo-old.tar.gz, size=$(stat -c%s "$TEST_DIR/solo-old.tar.gz" 2>/dev/null || stat -f%z "$TEST_DIR/solo-old.tar.gz" 2>/dev/null || wc -c < "$TEST_DIR/solo-old.tar.gz")" >&2

create_tar "$TEST_DIR/solo-new.tar" "$TEST_DIR/solom"
echo "=== DEBUG: Created solo-new.tar.bz2, size=$(stat -c%s "$TEST_DIR/solo-new.tar.bz2" 2>/dev/null || stat -f%z "$TEST_DIR/solo-new.tar.bz2" 2>/dev/null || wc -c < "$TEST_DIR/solo-new.tar.bz2")" >&2

echo "=== DEBUG: Running tar-diff to create solo.tardiff ===" >&2
./tar-diff -max-bsdiff-size 64 "$TEST_DIR/solo-old.tar.gz" "$TEST_DIR/solo-new.tar.bz2" "$TEST_DIR/solo.tardiff"
tardiff_exit=$?
echo "=== DEBUG: tar-diff exit code: $tardiff_exit ===" >&2
if [[ -f "$TEST_DIR/solo.tardiff" ]]; then
echo "=== DEBUG: solo.tardiff created, size=$(stat -c%s "$TEST_DIR/solo.tardiff" 2>/dev/null || stat -f%z "$TEST_DIR/solo.tardiff" 2>/dev/null || wc -c < "$TEST_DIR/solo.tardiff")" >&2
else
echo "=== DEBUG: solo.tardiff WAS NOT CREATED!" >&2
fi
rm -f "$TEST_DIR/solo/data/only.txt"
if [[ -e "$TEST_DIR/solo/data/only.txt" ]]; then
echo "expected solo/data/only.txt removed before tar-patch" >&2
Expand Down
Loading