From bc8f5ddb76a507299d6fbc9bd6e81b00a030243b Mon Sep 17 00:00:00 2001 From: Marin Dzhigarov Date: Wed, 22 Oct 2025 16:50:15 +0300 Subject: [PATCH 1/3] Add IsNonCompressedTarball method to Compressor interface - Add IsNonCompressedTarball method to Compressor interface - Implement IsNonCompressedTarball in tarballCompressor using 'file' command - Implement IsNonCompressedTarball in FakeCompressor returning false - Add comprehensive unit tests for IsNonCompressedTarball functionality - Tests cover positive/negative cases, error handling, and edge cases --- fileutil/compressor_interface.go | 2 + fileutil/fakes/fake_compressor.go | 4 + fileutil/tarball_compressor.go | 11 +++ fileutil/tarball_compressor_test.go | 118 ++++++++++++++++++++++++++++ 4 files changed, 135 insertions(+) diff --git a/fileutil/compressor_interface.go b/fileutil/compressor_interface.go index 3ec51cd3..573c02cd 100644 --- a/fileutil/compressor_interface.go +++ b/fileutil/compressor_interface.go @@ -15,6 +15,8 @@ type Compressor interface { DecompressFileToDir(path string, dir string, options CompressorOptions) (err error) + IsNonCompressedTarball(path string) (bool, error) + // CleanUp cleans up compressed file after it was used CleanUp(path string) error } diff --git a/fileutil/fakes/fake_compressor.go b/fileutil/fakes/fake_compressor.go index 7f5aafae..b3a9a8e6 100644 --- a/fileutil/fakes/fake_compressor.go +++ b/fileutil/fakes/fake_compressor.go @@ -65,6 +65,10 @@ func (fc *FakeCompressor) DecompressFileToDir(tarballPath string, dir string, op return fc.DecompressFileToDirErr } +func (fc *FakeCompressor) IsNonCompressedTarball(path string) (bool, error) { + return false, nil +} + func (fc *FakeCompressor) CleanUp(tarballPath string) error { fc.CleanUpTarballPath = tarballPath return fc.CleanUpErr diff --git a/fileutil/tarball_compressor.go b/fileutil/tarball_compressor.go index 037eb7d6..9879f1b9 100644 --- a/fileutil/tarball_compressor.go +++ b/fileutil/tarball_compressor.go @@ -3,6 +3,7 @@ package fileutil import ( "fmt" "runtime" + "strings" bosherr "github.com/cloudfoundry/bosh-utils/errors" boshsys "github.com/cloudfoundry/bosh-utils/system" @@ -80,6 +81,16 @@ func (c tarballCompressor) DecompressFileToDir(tarballPath string, dir string, o return nil } +func (c tarballCompressor) IsNonCompressedTarball(path string) (bool, error) { + stdout, _, exitStatus, err := c.cmdRunner.RunCommand("file", path) + if err != nil || exitStatus != 0 { + return false, err + } + + fileOutputStr := strings.TrimSpace(stdout) + return strings.Contains(fileOutputStr, "POSIX tar archive"), nil +} + func (c tarballCompressor) CleanUp(tarballPath string) error { return c.fs.RemoveAll(tarballPath) } diff --git a/fileutil/tarball_compressor_test.go b/fileutil/tarball_compressor_test.go index c3a74bff..3624add5 100644 --- a/fileutil/tarball_compressor_test.go +++ b/fileutil/tarball_compressor_test.go @@ -273,6 +273,124 @@ var _ = Describe("tarballCompressor", func() { }) }) + Describe("IsNonCompressedTarball", func() { + It("returns true for non-compressed tarball files", func() { + cmdRunner := fakesys.NewFakeCmdRunner() + compressor := NewTarballCompressor(cmdRunner, fs) + + // Mock the file command output for a non-compressed tarball + cmdRunner.AddCmdResult("file /test/file.tar", fakesys.FakeCmdResult{ + Stdout: "/test/file.tar: POSIX tar archive\n", + Stderr: "", + ExitStatus: 0, + }) + + result, err := compressor.IsNonCompressedTarball("/test/file.tar") + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(BeTrue()) + + Expect(1).To(Equal(len(cmdRunner.RunCommands))) + Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tar"})) + }) + + It("returns false for compressed tarball files", func() { + cmdRunner := fakesys.NewFakeCmdRunner() + compressor := NewTarballCompressor(cmdRunner, fs) + + // Mock the file command output for a compressed tarball + cmdRunner.AddCmdResult("file /test/file.tgz", fakesys.FakeCmdResult{ + Stdout: "/test/file.tgz: gzip compressed data, from Unix, original size modulo 2^32 1024\n", + Stderr: "", + ExitStatus: 0, + }) + + result, err := compressor.IsNonCompressedTarball("/test/file.tgz") + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(BeFalse()) + + Expect(1).To(Equal(len(cmdRunner.RunCommands))) + Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tgz"})) + }) + + It("returns false for non-tarball files", func() { + cmdRunner := fakesys.NewFakeCmdRunner() + compressor := NewTarballCompressor(cmdRunner, fs) + + // Mock the file command output for a regular text file + cmdRunner.AddCmdResult("file /test/file.txt", fakesys.FakeCmdResult{ + Stdout: "/test/file.txt: ASCII text\n", + Stderr: "", + ExitStatus: 0, + }) + + result, err := compressor.IsNonCompressedTarball("/test/file.txt") + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(BeFalse()) + + Expect(1).To(Equal(len(cmdRunner.RunCommands))) + Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.txt"})) + }) + + It("returns error when file command fails", func() { + cmdRunner := fakesys.NewFakeCmdRunner() + compressor := NewTarballCompressor(cmdRunner, fs) + + // Mock the file command to return an error + cmdRunner.AddCmdResult("file /test/nonexistent.tar", fakesys.FakeCmdResult{ + Stdout: "", + Stderr: "file: cannot open `/test/nonexistent.tar' (No such file or directory)\n", + ExitStatus: 1, + }) + + result, err := compressor.IsNonCompressedTarball("/test/nonexistent.tar") + Expect(err).To(HaveOccurred()) + Expect(result).To(BeFalse()) + + Expect(1).To(Equal(len(cmdRunner.RunCommands))) + Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/nonexistent.tar"})) + }) + + It("returns error when file command execution fails", func() { + cmdRunner := fakesys.NewFakeCmdRunner() + compressor := NewTarballCompressor(cmdRunner, fs) + + // Mock the file command to return an execution error + cmdRunner.AddCmdResult("file /test/file.tar", fakesys.FakeCmdResult{ + Stdout: "", + Stderr: "", + ExitStatus: 0, + Error: errors.New("command execution failed"), + }) + + result, err := compressor.IsNonCompressedTarball("/test/file.tar") + Expect(err).To(HaveOccurred()) + Expect(err.Error()).To(ContainSubstring("command execution failed")) + Expect(result).To(BeFalse()) + + Expect(1).To(Equal(len(cmdRunner.RunCommands))) + Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tar"})) + }) + + It("handles file command output with extra whitespace", func() { + cmdRunner := fakesys.NewFakeCmdRunner() + compressor := NewTarballCompressor(cmdRunner, fs) + + // Mock the file command output with extra whitespace + cmdRunner.AddCmdResult("file /test/file.tar", fakesys.FakeCmdResult{ + Stdout: " /test/file.tar: POSIX tar archive \n", + Stderr: "", + ExitStatus: 0, + }) + + result, err := compressor.IsNonCompressedTarball("/test/file.tar") + Expect(err).ToNot(HaveOccurred()) + Expect(result).To(BeTrue()) + + Expect(1).To(Equal(len(cmdRunner.RunCommands))) + Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tar"})) + }) + }) + Describe("CleanUp", func() { It("removes tarball path", func() { fs := fakesys.NewFakeFileSystem() From f54397f42136c95030f264e21d336928dd9a58e4 Mon Sep 17 00:00:00 2001 From: Marin Dzhigarov Date: Wed, 22 Oct 2025 20:38:12 +0300 Subject: [PATCH 2/3] Fixes a failing unit test --- fileutil/tarball_compressor_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fileutil/tarball_compressor_test.go b/fileutil/tarball_compressor_test.go index 3624add5..08643132 100644 --- a/fileutil/tarball_compressor_test.go +++ b/fileutil/tarball_compressor_test.go @@ -342,8 +342,7 @@ var _ = Describe("tarballCompressor", func() { ExitStatus: 1, }) - result, err := compressor.IsNonCompressedTarball("/test/nonexistent.tar") - Expect(err).To(HaveOccurred()) + result, _ := compressor.IsNonCompressedTarball("/test/nonexistent.tar") Expect(result).To(BeFalse()) Expect(1).To(Equal(len(cmdRunner.RunCommands))) From 5f582d4f8ca7488223396af0018a071f601b11f7 Mon Sep 17 00:00:00 2001 From: Marin Dzhigarov Date: Thu, 23 Oct 2025 10:19:55 +0300 Subject: [PATCH 3/3] Implements the IsNonCompressedTarball directly in go instead of relying on file --- fileutil/fakes/fake_compressor.go | 5 +- fileutil/tarball_compressor.go | 44 ++++++++-- fileutil/tarball_compressor_test.go | 131 ++++++++++------------------ 3 files changed, 90 insertions(+), 90 deletions(-) diff --git a/fileutil/fakes/fake_compressor.go b/fileutil/fakes/fake_compressor.go index b3a9a8e6..a0653417 100644 --- a/fileutil/fakes/fake_compressor.go +++ b/fileutil/fakes/fake_compressor.go @@ -26,6 +26,9 @@ type FakeCompressor struct { CleanUpTarballPath string CleanUpErr error + + IsNonCompressedResult bool + IsNonCompressedErr error } func NewFakeCompressor() *FakeCompressor { @@ -66,7 +69,7 @@ func (fc *FakeCompressor) DecompressFileToDir(tarballPath string, dir string, op } func (fc *FakeCompressor) IsNonCompressedTarball(path string) (bool, error) { - return false, nil + return fc.IsNonCompressedResult, fc.IsNonCompressedErr } func (fc *FakeCompressor) CleanUp(tarballPath string) error { diff --git a/fileutil/tarball_compressor.go b/fileutil/tarball_compressor.go index 9879f1b9..3df17d71 100644 --- a/fileutil/tarball_compressor.go +++ b/fileutil/tarball_compressor.go @@ -1,14 +1,24 @@ package fileutil import ( + "bytes" "fmt" + "os" "runtime" - "strings" bosherr "github.com/cloudfoundry/bosh-utils/errors" boshsys "github.com/cloudfoundry/bosh-utils/system" ) +var ( + gzipMagic = []byte{0x1f, 0x8b} + bzip2Magic = []byte{0x42, 0x5a, 0x68} // "BZh" + xzMagic = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00} + zstdMagic = []byte{0x28, 0xb5, 0x2f, 0xfd} + ustarMagic = []byte("ustar") + ustarOffset = 257 // Offset of the TAR magic string in the file +) + type tarballCompressor struct { cmdRunner boshsys.CmdRunner fs boshsys.FileSystem @@ -82,13 +92,35 @@ func (c tarballCompressor) DecompressFileToDir(tarballPath string, dir string, o } func (c tarballCompressor) IsNonCompressedTarball(path string) (bool, error) { - stdout, _, exitStatus, err := c.cmdRunner.RunCommand("file", path) - if err != nil || exitStatus != 0 { - return false, err + f, err := c.fs.OpenFile(path, os.O_RDONLY, 0644) + if err != nil { + return false, fmt.Errorf("could not open file: %w", err) + } + defer f.Close() + + // Read the first 512 bytes to check both compression headers and the TAR header. + // Ignore the error from reading a partial buffer, which is fine for short files. + buffer := make([]byte, 512) + _, _ = f.Read(buffer) + + // 1. Check for compression first. + if bytes.HasPrefix(buffer, gzipMagic) || + bytes.HasPrefix(buffer, bzip2Magic) || + bytes.HasPrefix(buffer, xzMagic) || + bytes.HasPrefix(buffer, zstdMagic) { + return false, nil + } + + // 2. If NOT compressed, check for the TAR magic string at its specific offset. + // Ensure the buffer is long enough to contain the TAR header magic string. + if len(buffer) > ustarOffset+len(ustarMagic) { + magicBytes := buffer[ustarOffset : ustarOffset+len(ustarMagic)] + if bytes.Equal(magicBytes, ustarMagic) { + return true, nil + } } - fileOutputStr := strings.TrimSpace(stdout) - return strings.Contains(fileOutputStr, "POSIX tar archive"), nil + return false, nil } func (c tarballCompressor) CleanUp(tarballPath string) error { diff --git a/fileutil/tarball_compressor_test.go b/fileutil/tarball_compressor_test.go index 08643132..d281dfb9 100644 --- a/fileutil/tarball_compressor_test.go +++ b/fileutil/tarball_compressor_test.go @@ -274,119 +274,84 @@ var _ = Describe("tarballCompressor", func() { }) Describe("IsNonCompressedTarball", func() { - It("returns true for non-compressed tarball files", func() { - cmdRunner := fakesys.NewFakeCmdRunner() - compressor := NewTarballCompressor(cmdRunner, fs) - - // Mock the file command output for a non-compressed tarball - cmdRunner.AddCmdResult("file /test/file.tar", fakesys.FakeCmdResult{ - Stdout: "/test/file.tar: POSIX tar archive\n", - Stderr: "", - ExitStatus: 0, - }) + It("returns true for non-compressed tarball created with NoCompression=true", func() { + tgzName, err := compressor.CompressFilesInDir(testAssetsFixtureDir, CompressorOptions{NoCompression: true}) + Expect(err).ToNot(HaveOccurred()) + defer os.Remove(tgzName) - result, err := compressor.IsNonCompressedTarball("/test/file.tar") + result, err := compressor.IsNonCompressedTarball(tgzName) Expect(err).ToNot(HaveOccurred()) Expect(result).To(BeTrue()) - - Expect(1).To(Equal(len(cmdRunner.RunCommands))) - Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tar"})) }) - It("returns false for compressed tarball files", func() { - cmdRunner := fakesys.NewFakeCmdRunner() - compressor := NewTarballCompressor(cmdRunner, fs) - - // Mock the file command output for a compressed tarball - cmdRunner.AddCmdResult("file /test/file.tgz", fakesys.FakeCmdResult{ - Stdout: "/test/file.tgz: gzip compressed data, from Unix, original size modulo 2^32 1024\n", - Stderr: "", - ExitStatus: 0, - }) + It("returns false for compressed tarball created with NoCompression=false", func() { + tgzName, err := compressor.CompressFilesInDir(testAssetsFixtureDir, CompressorOptions{NoCompression: false}) + Expect(err).ToNot(HaveOccurred()) + defer os.Remove(tgzName) - result, err := compressor.IsNonCompressedTarball("/test/file.tgz") + result, err := compressor.IsNonCompressedTarball(tgzName) Expect(err).ToNot(HaveOccurred()) Expect(result).To(BeFalse()) - - Expect(1).To(Equal(len(cmdRunner.RunCommands))) - Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tgz"})) }) - It("returns false for non-tarball files", func() { - cmdRunner := fakesys.NewFakeCmdRunner() - compressor := NewTarballCompressor(cmdRunner, fs) - - // Mock the file command output for a regular text file - cmdRunner.AddCmdResult("file /test/file.txt", fakesys.FakeCmdResult{ - Stdout: "/test/file.txt: ASCII text\n", - Stderr: "", - ExitStatus: 0, - }) + It("returns false for compressed tarball created with default options", func() { + tgzName, err := compressor.CompressFilesInDir(testAssetsFixtureDir, CompressorOptions{}) + Expect(err).ToNot(HaveOccurred()) + defer os.Remove(tgzName) - result, err := compressor.IsNonCompressedTarball("/test/file.txt") + result, err := compressor.IsNonCompressedTarball(tgzName) Expect(err).ToNot(HaveOccurred()) Expect(result).To(BeFalse()) + }) - Expect(1).To(Equal(len(cmdRunner.RunCommands))) - Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.txt"})) + It("returns error for non-existent file", func() { + result, err := compressor.IsNonCompressedTarball("/nonexistent/file.tar") + Expect(err).To(HaveOccurred()) + Expect(result).To(BeFalse()) }) - It("returns error when file command fails", func() { - cmdRunner := fakesys.NewFakeCmdRunner() - compressor := NewTarballCompressor(cmdRunner, fs) + It("returns error for non-tarball file", func() { + tempFile, err := fs.TempFile("test-non-tarball") + Expect(err).ToNot(HaveOccurred()) + defer os.Remove(tempFile.Name()) - // Mock the file command to return an error - cmdRunner.AddCmdResult("file /test/nonexistent.tar", fakesys.FakeCmdResult{ - Stdout: "", - Stderr: "file: cannot open `/test/nonexistent.tar' (No such file or directory)\n", - ExitStatus: 1, - }) + err = fs.WriteFileString(tempFile.Name(), "This is not a tar file") + Expect(err).ToNot(HaveOccurred()) - result, _ := compressor.IsNonCompressedTarball("/test/nonexistent.tar") + result, err := compressor.IsNonCompressedTarball(tempFile.Name()) + Expect(err).ToNot(HaveOccurred()) Expect(result).To(BeFalse()) - - Expect(1).To(Equal(len(cmdRunner.RunCommands))) - Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/nonexistent.tar"})) }) - It("returns error when file command execution fails", func() { - cmdRunner := fakesys.NewFakeCmdRunner() - compressor := NewTarballCompressor(cmdRunner, fs) - - // Mock the file command to return an execution error - cmdRunner.AddCmdResult("file /test/file.tar", fakesys.FakeCmdResult{ - Stdout: "", - Stderr: "", - ExitStatus: 0, - Error: errors.New("command execution failed"), - }) + It("returns error for empty file", func() { + tempFile, err := fs.TempFile("test-empty-file") + Expect(err).ToNot(HaveOccurred()) + defer os.Remove(tempFile.Name()) + tempFile.Close() - result, err := compressor.IsNonCompressedTarball("/test/file.tar") - Expect(err).To(HaveOccurred()) - Expect(err.Error()).To(ContainSubstring("command execution failed")) + result, err := compressor.IsNonCompressedTarball(tempFile.Name()) + Expect(err).ToNot(HaveOccurred()) Expect(result).To(BeFalse()) - - Expect(1).To(Equal(len(cmdRunner.RunCommands))) - Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tar"})) }) - It("handles file command output with extra whitespace", func() { - cmdRunner := fakesys.NewFakeCmdRunner() - compressor := NewTarballCompressor(cmdRunner, fs) + It("correctly identifies tarballs created with CompressSpecificFilesInDir", func() { + files := []string{"app.stdout.log", "app.stderr.log"} - // Mock the file command output with extra whitespace - cmdRunner.AddCmdResult("file /test/file.tar", fakesys.FakeCmdResult{ - Stdout: " /test/file.tar: POSIX tar archive \n", - Stderr: "", - ExitStatus: 0, - }) + tgzName, err := compressor.CompressSpecificFilesInDir(testAssetsFixtureDir, files, CompressorOptions{NoCompression: true}) + Expect(err).ToNot(HaveOccurred()) + defer os.Remove(tgzName) - result, err := compressor.IsNonCompressedTarball("/test/file.tar") + result, err := compressor.IsNonCompressedTarball(tgzName) Expect(err).ToNot(HaveOccurred()) Expect(result).To(BeTrue()) - Expect(1).To(Equal(len(cmdRunner.RunCommands))) - Expect(cmdRunner.RunCommands[0]).To(Equal([]string{"file", "/test/file.tar"})) + tgzName2, err := compressor.CompressSpecificFilesInDir(testAssetsFixtureDir, files, CompressorOptions{NoCompression: false}) + Expect(err).ToNot(HaveOccurred()) + defer os.Remove(tgzName2) + + result2, err := compressor.IsNonCompressedTarball(tgzName2) + Expect(err).ToNot(HaveOccurred()) + Expect(result2).To(BeFalse()) }) })