From 715649255153d3cbe8bba005ad5d427a8aa8daea Mon Sep 17 00:00:00 2001 From: Danny Canter Date: Sun, 11 Jan 2026 11:57:32 -0800 Subject: [PATCH] _ContainerizationTar: Add new native tar reader/writer Swift does not have a native tar reader/writer, and we'd really like to avoid linking more (libarchive) libraries to vminitd if we can get away with it. For copying directories into/out of containers tar is fairly nice as it's a simple way to preserve everything you need to be able to reassemble the directory on the receiving end. Due to this, I decided to write a somewhat simple tar reader/writer solely for this purpose. Unfortunately, there's quite a lot of work to get vminitd/ to compile on macOS without the static linux SDK, so it was making unit testing these additions quite a pain, so for now the new work lives in _ContainerizationTar (where the underscore is trying to denote that this is rather experimental..). This change aims to add a simple tar reader and writer with support pax extended headers (for long file names and > 8 GiB files). Because its intended purpose is in a scenario where we own both the creator and ingestor, the reader does NOT handle every case, but it is good at unarchiving the archives the library has made :) --- Package.resolved | 6 +- Package.swift | 21 +- Sources/ContainerizationTar/TarHeader.swift | 542 ++++++++ Sources/ContainerizationTar/TarPax.swift | 328 +++++ Sources/ContainerizationTar/TarReader.swift | 340 +++++ Sources/ContainerizationTar/TarWriter.swift | 481 +++++++ Tests/ContainerizationTarTests/TarTests.swift | 1236 +++++++++++++++++ vminitd/Package.resolved | 6 +- vminitd/Package.swift | 1 + 9 files changed, 2954 insertions(+), 7 deletions(-) create mode 100644 Sources/ContainerizationTar/TarHeader.swift create mode 100644 Sources/ContainerizationTar/TarPax.swift create mode 100644 Sources/ContainerizationTar/TarReader.swift create mode 100644 Sources/ContainerizationTar/TarWriter.swift create mode 100644 Tests/ContainerizationTarTests/TarTests.swift diff --git a/Package.resolved b/Package.resolved index abb229e3..50a80a06 100644 --- a/Package.resolved +++ b/Package.resolved @@ -1,5 +1,5 @@ { - "originHash" : "c82be4e21117351bb3f942869ce90d35dcd0dd0223dc1c49ce7a56b52709e836", + "originHash" : "e5fa0e8b0e9dab4b79c924cd2c585e41bb516d58ef0af8f1b3a1d1a4a7d9810d", "pins" : [ { "identity" : "async-http-client", @@ -141,8 +141,8 @@ "kind" : "remoteSourceControl", "location" : "https://github.com/apple/swift-nio.git", "state" : { - "revision" : "34d486b01cd891297ac615e40d5999536a1e138d", - "version" : "2.83.0" + "revision" : "4a9a97111099376854a7f8f0f9f88b9d61f52eff", + "version" : "2.92.2" } }, { diff --git a/Package.swift b/Package.swift index 7006474c..792701dc 100644 --- a/Package.swift +++ b/Package.swift @@ -33,6 +33,7 @@ let package = Package( .library(name: "ContainerizationOS", targets: ["ContainerizationOS"]), .library(name: "ContainerizationExtras", targets: ["ContainerizationExtras"]), .library(name: "ContainerizationArchive", targets: ["ContainerizationArchive"]), + .library(name: "_ContainerizationTar", targets: ["_ContainerizationTar"]), .executable(name: "cctl", targets: ["cctl"]), ], dependencies: [ @@ -42,7 +43,7 @@ let package = Package( .package(url: "https://github.com/apple/swift-crypto.git", from: "3.0.0"), .package(url: "https://github.com/grpc/grpc-swift.git", from: "1.26.0"), .package(url: "https://github.com/apple/swift-protobuf.git", from: "1.29.0"), - .package(url: "https://github.com/apple/swift-nio.git", from: "2.80.0"), + .package(url: "https://github.com/apple/swift-nio.git", from: "2.92.2"), .package(url: "https://github.com/swift-server/async-http-client.git", from: "1.20.1"), .package(url: "https://github.com/apple/swift-system.git", from: "1.4.0"), .package(url: "https://github.com/swiftlang/swift-docc-plugin", from: "1.1.0"), @@ -246,5 +247,23 @@ let package = Package( .target( name: "CShim" ), + .target( + name: "_ContainerizationTar", + dependencies: [ + .product(name: "SystemPackage", package: "swift-system"), + .product(name: "NIOCore", package: "swift-nio"), + .product(name: "_NIOFileSystem", package: "swift-nio"), + ], + path: "Sources/ContainerizationTar" + ), + .testTarget( + name: "ContainerizationTarTests", + dependencies: [ + "_ContainerizationTar", + .product(name: "SystemPackage", package: "swift-system"), + .product(name: "NIOCore", package: "swift-nio"), + .product(name: "_NIOFileSystem", package: "swift-nio"), + ] + ), ] ) diff --git a/Sources/ContainerizationTar/TarHeader.swift b/Sources/ContainerizationTar/TarHeader.swift new file mode 100644 index 00000000..a3f77219 --- /dev/null +++ b/Sources/ContainerizationTar/TarHeader.swift @@ -0,0 +1,542 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +/// TAR archive constants and header structure. +/// +/// TAR header format (POSIX ustar): +/// ``` +/// Offset Size Field +/// 0 100 File name +/// 100 8 File mode (octal) +/// 108 8 Owner UID (octal) +/// 116 8 Owner GID (octal) +/// 124 12 File size (octal) +/// 136 12 Modification time (octal) +/// 148 8 Checksum +/// 156 1 Type flag +/// 157 100 Link name +/// 257 6 Magic ("ustar\0") +/// 263 2 Version ("00") +/// 265 32 Owner user name +/// 297 32 Owner group name +/// 329 8 Device major number +/// 337 8 Device minor number +/// 345 155 Filename prefix +/// 500 12 Padding (zeros) +/// ``` +enum TarConstants { + /// Size of a TAR block in bytes. + static let blockSize = 512 + + /// USTAR magic string. + static let magic: [UInt8] = [0x75, 0x73, 0x74, 0x61, 0x72, 0x00] // "ustar\0" + + /// USTAR version. + static let version: [UInt8] = [0x30, 0x30] // "00" + + /// Maximum file size representable in traditional TAR (11 octal digits). + /// 8,589,934,591 bytes (~8GB) + static let maxTraditionalSize: Int64 = 0o77777777777 + + /// Maximum path length in traditional TAR name field. + static let maxNameLength = 100 + + /// Maximum path length using prefix field. + static let maxPrefixLength = 155 + + /// PAX header name used when writing extended headers. + static let paxHeaderName = "././@PaxHeader" + + /// Maximum size for PAX extended header data (1MB). + static let maxPaxSize = 1024 * 1024 +} + +/// TAR entry type flags. +public enum TarEntryType: UInt8, Sendable { + /// Regular file (or '\0' for old TAR). + case regular = 0x30 // '0' + + /// Hard link. + case hardLink = 0x31 // '1' + + /// Symbolic link. + case symbolicLink = 0x32 // '2' + + /// Character device. + case characterDevice = 0x33 // '3' + + /// Block device. + case blockDevice = 0x34 // '4' + + /// Directory. + case directory = 0x35 // '5' + + /// FIFO (named pipe). + case fifo = 0x36 // '6' + + /// Contiguous file. + case contiguous = 0x37 // '7' + + /// PAX extended header (per-file). + case paxExtended = 0x78 // 'x' + + /// PAX global extended header. + case paxGlobal = 0x67 // 'g' + + /// Null byte (old TAR regular file). + case regularAlt = 0x00 + + /// Whether this entry type represents a regular file. + public var isRegularFile: Bool { + self == .regular || self == .regularAlt + } +} + +/// Header field offsets and sizes. +enum TarHeaderField { + static let nameOffset = 0 + static let nameSize = 100 + + static let modeOffset = 100 + static let modeSize = 8 + + static let uidOffset = 108 + static let uidSize = 8 + + static let gidOffset = 116 + static let gidSize = 8 + + static let sizeOffset = 124 + static let sizeSize = 12 + + static let mtimeOffset = 136 + static let mtimeSize = 12 + + static let checksumOffset = 148 + static let checksumSize = 8 + + static let typeFlagOffset = 156 + static let typeFlagSize = 1 + + static let linkNameOffset = 157 + static let linkNameSize = 100 + + static let magicOffset = 257 + static let magicSize = 6 + + static let versionOffset = 263 + static let versionSize = 2 + + static let unameOffset = 265 + static let unameSize = 32 + + static let gnameOffset = 297 + static let gnameSize = 32 + + static let devMajorOffset = 329 + static let devMajorSize = 8 + + static let devMinorOffset = 337 + static let devMinorSize = 8 + + static let prefixOffset = 345 + static let prefixSize = 155 +} + +/// Represents a parsed TAR header. +public struct TarHeader: Sendable { + /// File path (may come from PAX extended header). + public var path: String + + /// File mode/permissions. + public var mode: UInt32 + + /// Owner user ID. + public var uid: UInt32 + + /// Owner group ID. + public var gid: UInt32 + + /// Content size in bytes. For regular files this is the file data size. + /// For PAX headers this is the size of the metadata records. + public var size: Int64 + + /// Modification time (Unix timestamp). + public var mtime: Int64 + + /// Entry type. + public var entryType: TarEntryType + + /// Link target (for symbolic/hard links). + public var linkName: String + + /// Owner user name. + public var userName: String + + /// Owner group name. + public var groupName: String + + /// Device major number. + public var deviceMajor: UInt32 + + /// Device minor number. + public var deviceMinor: UInt32 + + public init( + path: String, + mode: UInt32 = 0o644, + uid: UInt32 = 0, + gid: UInt32 = 0, + size: Int64 = 0, + mtime: Int64 = 0, + entryType: TarEntryType = .regular, + linkName: String = "", + userName: String = "root", + groupName: String = "root", + deviceMajor: UInt32 = 0, + deviceMinor: UInt32 = 0 + ) { + self.path = path + self.mode = mode + self.uid = uid + self.gid = gid + self.size = size + self.mtime = mtime + self.entryType = entryType + self.linkName = linkName + self.userName = userName + self.groupName = groupName + self.deviceMajor = deviceMajor + self.deviceMinor = deviceMinor + } +} + +// MARK: - Octal String Conversion + +extension TarHeader { + /// Convert an integer to an octal string with the specified width. + /// The string is null-terminated and right-padded with spaces if needed. + static func formatOctal(_ value: Int64, width: Int) -> [UInt8] { + var result = [UInt8](repeating: 0, count: width) + + // Format as octal string (width - 1 digits to leave room for null terminator) + let octalString = String(value, radix: 8) + let paddedString = String(repeating: "0", count: max(0, width - 1 - octalString.count)) + octalString + + // Copy to result buffer + let bytes = Array(paddedString.utf8) + let copyCount = min(bytes.count, width - 1) + for i in 0..) -> Int64 { + // Check for GNU binary extension (high bit set) + if let first = bytes.first, first & 0x80 != 0 { + // Binary format: remaining bytes are big-endian integer + var value: Int64 = 0 + for (index, byte) in bytes.enumerated() { + let b = index == 0 ? byte & 0x7F : byte // Clear high bit on first byte + value = (value << 8) | Int64(b) + } + return value + } + + // Standard octal ASCII format + var value: Int64 = 0 + for byte in bytes { + // Skip leading spaces and stop at null/space terminator + if byte == 0x20 { // space + if value == 0 { continue } // leading space + break // trailing space + } + if byte == 0x00 { break } // null terminator + + // Convert ASCII digit to value + if byte >= 0x30 && byte <= 0x37 { // '0' to '7' + value = value * 8 + Int64(byte - 0x30) + } + } + return value + } + + /// Parse a null-terminated string from a TAR header field. + static func parseString(_ bytes: ArraySlice) -> String { + // Find null terminator or end of slice + var endIndex = bytes.startIndex + for i in bytes.indices { + if bytes[i] == 0 { + break + } + endIndex = i + 1 + } + + let stringBytes = bytes[bytes.startIndex.. [UInt8]? { + var header = [UInt8](repeating: 0, count: TarConstants.blockSize) + + // Determine if we can fit the path in traditional format + let pathBytes = Array(path.utf8) + if pathBytes.count > TarConstants.maxNameLength + TarConstants.maxPrefixLength { + // Path too long even with prefix - need PAX + return nil + } + + // Try to split path into prefix and name + var nameBytes: [UInt8] + var prefixBytes: [UInt8] = [] + + if pathBytes.count <= TarConstants.maxNameLength { + nameBytes = pathBytes + } else { + // Find a slash to split on + guard let splitIndex = Self.findPathSplit(pathBytes) else { + // Can't split - need PAX + return nil + } + prefixBytes = Array(pathBytes[0.. TarConstants.maxTraditionalSize { + return nil + } + + // Name field + for (i, byte) in nameBytes.prefix(TarConstants.maxNameLength).enumerated() { + header[TarHeaderField.nameOffset + i] = byte + } + + // Mode + let modeOctal = Self.formatOctal(Int64(mode), width: TarHeaderField.modeSize) + for (i, byte) in modeOctal.enumerated() { + header[TarHeaderField.modeOffset + i] = byte + } + + // UID + let uidOctal = Self.formatOctal(Int64(uid), width: TarHeaderField.uidSize) + for (i, byte) in uidOctal.enumerated() { + header[TarHeaderField.uidOffset + i] = byte + } + + // GID + let gidOctal = Self.formatOctal(Int64(gid), width: TarHeaderField.gidSize) + for (i, byte) in gidOctal.enumerated() { + header[TarHeaderField.gidOffset + i] = byte + } + + // Size + let sizeOctal = Self.formatOctal(size, width: TarHeaderField.sizeSize) + for (i, byte) in sizeOctal.enumerated() { + header[TarHeaderField.sizeOffset + i] = byte + } + + // Modification time + let mtimeOctal = Self.formatOctal(mtime, width: TarHeaderField.mtimeSize) + for (i, byte) in mtimeOctal.enumerated() { + header[TarHeaderField.mtimeOffset + i] = byte + } + + // Checksum placeholder (spaces for calculation) + for i in 0.. Int? { + // Need to find a '/' such that: + // - prefix (before '/') is <= 155 bytes + // - name (after '/') is <= 100 bytes + let slash = UInt8(ascii: "/") + + for i in stride(from: min(pathBytes.count - 1, TarConstants.maxPrefixLength), through: 0, by: -1) { + if pathBytes[i] == slash { + let remainingLength = pathBytes.count - i - 1 + if remainingLength <= TarConstants.maxNameLength { + return i + } + } + } + return nil + } + + /// Calculate the TAR header checksum. + private func calculateChecksum(_ header: [UInt8]) -> Int { + var sum = 0 + for byte in header { + sum += Int(byte) + } + return sum + } +} + +// MARK: - Header Parsing + +extension TarHeader { + /// Parse a TAR header from a 512-byte block. + static func parse(from block: [UInt8]) -> TarHeader? { + guard block.count >= TarConstants.blockSize else { + return nil + } + + // Check if this is an empty block (end of archive) + if block.allSatisfy({ $0 == 0 }) { + return nil + } + + // Verify checksum + guard verifyChecksum(block) else { + return nil + } + + // Parse name (may need to combine with prefix) + let nameSlice = block[TarHeaderField.nameOffset..<(TarHeaderField.nameOffset + TarHeaderField.nameSize)] + let prefixSlice = block[TarHeaderField.prefixOffset..<(TarHeaderField.prefixOffset + TarHeaderField.prefixSize)] + + let name = parseString(nameSlice) + let prefix = parseString(prefixSlice) + + let path: String + if prefix.isEmpty { + path = name + } else { + path = prefix + "/" + name + } + + // Parse other fields + let modeSlice = block[TarHeaderField.modeOffset..<(TarHeaderField.modeOffset + TarHeaderField.modeSize)] + let uidSlice = block[TarHeaderField.uidOffset..<(TarHeaderField.uidOffset + TarHeaderField.uidSize)] + let gidSlice = block[TarHeaderField.gidOffset..<(TarHeaderField.gidOffset + TarHeaderField.gidSize)] + let sizeSlice = block[TarHeaderField.sizeOffset..<(TarHeaderField.sizeOffset + TarHeaderField.sizeSize)] + let mtimeSlice = block[TarHeaderField.mtimeOffset..<(TarHeaderField.mtimeOffset + TarHeaderField.mtimeSize)] + let linkNameSlice = block[TarHeaderField.linkNameOffset..<(TarHeaderField.linkNameOffset + TarHeaderField.linkNameSize)] + let unameSlice = block[TarHeaderField.unameOffset..<(TarHeaderField.unameOffset + TarHeaderField.unameSize)] + let gnameSlice = block[TarHeaderField.gnameOffset..<(TarHeaderField.gnameOffset + TarHeaderField.gnameSize)] + let devMajorSlice = block[TarHeaderField.devMajorOffset..<(TarHeaderField.devMajorOffset + TarHeaderField.devMajorSize)] + let devMinorSlice = block[TarHeaderField.devMinorOffset..<(TarHeaderField.devMinorOffset + TarHeaderField.devMinorSize)] + + let typeFlag = block[TarHeaderField.typeFlagOffset] + let entryType = TarEntryType(rawValue: typeFlag) ?? .regular + + return TarHeader( + path: path, + mode: UInt32(parseOctal(modeSlice)), + uid: UInt32(parseOctal(uidSlice)), + gid: UInt32(parseOctal(gidSlice)), + size: parseOctal(sizeSlice), + mtime: parseOctal(mtimeSlice), + entryType: entryType, + linkName: parseString(linkNameSlice), + userName: parseString(unameSlice), + groupName: parseString(gnameSlice), + deviceMajor: UInt32(parseOctal(devMajorSlice)), + deviceMinor: UInt32(parseOctal(devMinorSlice)) + ) + } + + /// Verify the checksum of a TAR header block. + private static func verifyChecksum(_ block: [UInt8]) -> Bool { + // Get the stored checksum + let checksumSlice = block[TarHeaderField.checksumOffset..<(TarHeaderField.checksumOffset + TarHeaderField.checksumSize)] + let storedChecksum = parseOctal(checksumSlice) + + // Calculate checksum (treating checksum field as spaces) + var calculatedChecksum = 0 + for (i, byte) in block.enumerated() { + if i >= TarHeaderField.checksumOffset && i < TarHeaderField.checksumOffset + TarHeaderField.checksumSize { + calculatedChecksum += 0x20 // space + } else { + calculatedChecksum += Int(byte) + } + } + + return storedChecksum == Int64(calculatedChecksum) + } +} diff --git a/Sources/ContainerizationTar/TarPax.swift b/Sources/ContainerizationTar/TarPax.swift new file mode 100644 index 00000000..59a17a80 --- /dev/null +++ b/Sources/ContainerizationTar/TarPax.swift @@ -0,0 +1,328 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +/// PAX extended header support for TAR archives. +/// +/// PAX headers allow storing extended metadata that doesn't fit in the +/// traditional TAR header format: +/// - Arbitrary length file paths +/// - File sizes > 8GB +/// - Sub-second timestamps +/// - Large UID/GID values +/// - UTF-8 file names +/// +/// Format: Each record is `LENGTH KEY=VALUE\n` where LENGTH includes itself. +package enum TarPax { + /// Standard PAX keywords. + package enum Keyword { + package static let path = "path" + package static let linkpath = "linkpath" + package static let size = "size" + package static let uid = "uid" + package static let gid = "gid" + package static let uname = "uname" + package static let gname = "gname" + package static let mtime = "mtime" + package static let atime = "atime" + package static let ctime = "ctime" + } + + /// Create a PAX record with the format: "LENGTH KEY=VALUE\n" + /// The length includes the length field itself, which requires iteration to compute. + package static func makeRecord(key: String, value: String) -> [UInt8] { + // Content is: " key=value\n" (note leading space after length) + let content = " \(key)=\(value)\n" + let contentBytes = Array(content.utf8) + + // Calculate the total length including the length field itself. + // This requires iteration because the length field's size affects the total. + var lengthDigits = 1 + var totalLength = contentBytes.count + lengthDigits + + while String(totalLength).count > lengthDigits { + lengthDigits = String(totalLength).count + totalLength = contentBytes.count + lengthDigits + } + + // Build the final record + let lengthString = String(totalLength) + return Array(lengthString.utf8) + contentBytes + } + + /// Parse PAX extended header data into key-value pairs. + package static func parseRecords(_ data: [UInt8]) -> [String: String] { + var result: [String: String] = [:] + var offset = 0 + + while offset < data.count { + // Parse length + var lengthEnd = offset + while lengthEnd < data.count && data[lengthEnd] != 0x20 { // space + lengthEnd += 1 + } + + guard lengthEnd < data.count else { break } + + let lengthBytes = data[offset.. recordStart else { break } + + // Record format is "key=value\n" + let recordBytes = data[recordStart.. TarConstants.maxNameLength + TarConstants.maxPrefixLength { + // Too long even with prefix + return true + } + // Check if there's a valid split point + if TarHeader.findPathSplit(pathBytes) == nil { + // No valid split point, need PAX + return true + } + } + + // Link name too long + if header.linkName.utf8.count > TarHeaderField.linkNameSize { + return true + } + + // File size too large + if header.size > TarConstants.maxTraditionalSize { + return true + } + + // UID/GID too large (max 7 octal digits = 2097151) + if header.uid > 2_097_151 || header.gid > 2_097_151 { + return true + } + + return false + } + + /// Build PAX extended header data for a given header. + package static func buildExtendedData(for header: TarHeader) -> [UInt8] { + var records: [UInt8] = [] + + // Path (always include if PAX is needed, regardless of why) + if header.path.utf8.count > TarConstants.maxNameLength { + records.append(contentsOf: makeRecord(key: Keyword.path, value: header.path)) + } + + // Link path + if header.linkName.utf8.count > TarHeaderField.linkNameSize { + records.append(contentsOf: makeRecord(key: Keyword.linkpath, value: header.linkName)) + } + + // Size + if header.size > TarConstants.maxTraditionalSize { + records.append(contentsOf: makeRecord(key: Keyword.size, value: String(header.size))) + } + + // UID + if header.uid > 2_097_151 { + records.append(contentsOf: makeRecord(key: Keyword.uid, value: String(header.uid))) + } + + // GID + if header.gid > 2_097_151 { + records.append(contentsOf: makeRecord(key: Keyword.gid, value: String(header.gid))) + } + + return records + } + + /// Create a PAX extended header entry. + /// Returns the complete header block(s) including the PAX data. + package static func createPaxEntry(for header: TarHeader) -> [UInt8] { + let paxData = buildExtendedData(for: header) + + guard !paxData.isEmpty else { + return [] + } + + let paxHeader = TarHeader( + path: TarConstants.paxHeaderName, + mode: 0o644, + uid: 0, + gid: 0, + size: Int64(paxData.count), + mtime: header.mtime, + entryType: .paxExtended, + userName: header.userName, + groupName: header.groupName + ) + + let headerBlock: [UInt8] + if let serialized = paxHeader.serialize() { + headerBlock = serialized + } else { + // Fallback: create minimal header manually + headerBlock = createMinimalPaxHeader(size: paxData.count, mtime: header.mtime) + } + + let paddedData = padToBlockBoundary(paxData) + + return headerBlock + paddedData + } + + /// Create a minimal PAX header block when normal serialization fails. + private static func createMinimalPaxHeader(size: Int, mtime: Int64) -> [UInt8] { + var header = [UInt8](repeating: 0, count: TarConstants.blockSize) + + // Name: ././@PaxHeader + let name = Array(TarConstants.paxHeaderName.utf8) + for (i, byte) in name.prefix(TarHeaderField.nameSize).enumerated() { + header[TarHeaderField.nameOffset + i] = byte + } + + // Mode: 0644 + let modeOctal = TarHeader.formatOctal(0o644, width: TarHeaderField.modeSize) + for (i, byte) in modeOctal.enumerated() { + header[TarHeaderField.modeOffset + i] = byte + } + + // UID: 0 + let uidOctal = TarHeader.formatOctal(0, width: TarHeaderField.uidSize) + for (i, byte) in uidOctal.enumerated() { + header[TarHeaderField.uidOffset + i] = byte + } + + // GID: 0 + let gidOctal = TarHeader.formatOctal(0, width: TarHeaderField.gidSize) + for (i, byte) in gidOctal.enumerated() { + header[TarHeaderField.gidOffset + i] = byte + } + + // Size + let sizeOctal = TarHeader.formatOctal(Int64(size), width: TarHeaderField.sizeSize) + for (i, byte) in sizeOctal.enumerated() { + header[TarHeaderField.sizeOffset + i] = byte + } + + // Mtime + let mtimeOctal = TarHeader.formatOctal(mtime, width: TarHeaderField.mtimeSize) + for (i, byte) in mtimeOctal.enumerated() { + header[TarHeaderField.mtimeOffset + i] = byte + } + + // Checksum placeholder (spaces) + for i in 0.. [UInt8] { + let remainder = data.count % TarConstants.blockSize + if remainder == 0 { + return data + } + + let paddingNeeded = TarConstants.blockSize - remainder + return data + [UInt8](repeating: 0, count: paddingNeeded) + } + + /// Apply PAX overrides to a parsed header. + package static func applyOverrides(_ paxData: [String: String], to header: inout TarHeader) { + if let path = paxData[Keyword.path] { + header.path = path + } + + if let linkpath = paxData[Keyword.linkpath] { + header.linkName = linkpath + } + + if let sizeString = paxData[Keyword.size], let size = Int64(sizeString) { + header.size = size + } + + if let uidString = paxData[Keyword.uid], let uid = UInt32(uidString) { + header.uid = uid + } + + if let gidString = paxData[Keyword.gid], let gid = UInt32(gidString) { + header.gid = gid + } + + if let uname = paxData[Keyword.uname] { + header.userName = uname + } + + if let gname = paxData[Keyword.gname] { + header.groupName = gname + } + } +} diff --git a/Sources/ContainerizationTar/TarReader.swift b/Sources/ContainerizationTar/TarReader.swift new file mode 100644 index 00000000..a2dc5f95 --- /dev/null +++ b/Sources/ContainerizationTar/TarReader.swift @@ -0,0 +1,340 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +import SystemPackage + +#if canImport(_NIOFileSystem) +import NIOCore +import _NIOFileSystem +#endif + +/// Errors that can occur during TAR reading. +public enum TarReaderError: Error, Sendable { + /// Unexpected end of archive. + case unexpectedEndOfArchive + + /// Invalid header (checksum failed or corrupt). + case invalidHeader + + /// Failed to parse PAX extended data. + case invalidPaxData + + /// PAX data exceeds maximum allowed size. + case paxDataTooLarge(Int) + + /// I/O error during reading. + case ioError(Errno) + + /// Invalid state. + case invalidState(String) + + /// Entry type not supported. + case unsupportedEntryType(UInt8) + + /// Path traversal attempt detected. + case pathTraversal(String) +} + +/// A TAR archive reader with PAX support. +/// +/// Example usage: +/// ```swift +/// let reader = try TarReader(fileDescriptor: fd) +/// +/// let buffer = UnsafeMutableRawBufferPointer.allocate(byteCount: 64 * 1024, alignment: 1) +/// defer { buffer.deallocate() } +/// +/// while let header = try reader.nextHeader() { +/// print("Entry: \(header.path)") +/// +/// if header.entryType.isRegularFile { +/// while reader.contentBytesRemaining > 0 { +/// let bytesRead = try reader.readContent(into: buffer) +/// // Process buffer[0.. TarHeader? { + if endOfArchive { + return nil + } + + // Skip any remaining content/padding from previous entry + try skipRemainingContent() + + while true { + try readExactInternal(into: &internalBuffer, count: TarConstants.blockSize) + + if internalBuffer[0.. TarConstants.maxPaxSize { + throw TarReaderError.paxDataTooLarge(paxSize) + } + if paxSize > internalBuffer.count { + internalBuffer = [UInt8](repeating: 0, count: paxSize) + } + try readExactInternal(into: &internalBuffer, count: paxSize) + paxOverrides = TarPax.parseRecords(Array(internalBuffer[0.. TarConstants.maxPaxSize { + throw TarReaderError.paxDataTooLarge(paxSize) + } + try skipBytes(paxSize) + try skipPadding(for: header.size) + continue + } + + // Apply PAX overrides if any + if !paxOverrides.isEmpty { + TarPax.applyOverrides(paxOverrides, to: &header) + paxOverrides.removeAll() + } + + currentHeader = header + contentBytesRemaining = header.size + + // Calculate padding that will need to be skipped + let remainder = Int(header.size % Int64(TarConstants.blockSize)) + paddingBytesRemaining = remainder == 0 ? 0 : TarConstants.blockSize - remainder + + return header + } + } + + /// Read content from the current entry into the provided buffer. + /// - Parameter buffer: The buffer to read into. Reads up to buffer.count bytes. + /// - Returns: The number of bytes read. Returns 0 when no content remains. + public func readContent(into buffer: UnsafeMutableRawBufferPointer) throws -> Int { + guard currentHeader != nil else { + throw TarReaderError.invalidState("No current entry - call nextHeader() first") + } + + guard contentBytesRemaining > 0, buffer.count > 0 else { + return 0 + } + + let toRead = min(Int(contentBytesRemaining), buffer.count) + var totalRead = 0 + + guard let baseAddress = buffer.baseAddress else { + return 0 + } + + while totalRead < toRead { + let remaining = UnsafeMutableRawBufferPointer( + start: baseAddress.advanced(by: totalRead), + count: toRead - totalRead + ) + let bytesRead = try fileDescriptor.read(into: remaining) + if bytesRead == 0 { + throw TarReaderError.unexpectedEndOfArchive + } + totalRead += bytesRead + } + + contentBytesRemaining -= Int64(totalRead) + + // If we've read all content, skip padding automatically + if contentBytesRemaining == 0 && paddingBytesRemaining > 0 { + try skipBytes(paddingBytesRemaining) + paddingBytesRemaining = 0 + } + + return totalRead + } + + /// Skip the remaining content of the current entry. + /// Call this if you don't need the content and want to move to the next entry. + public func skipRemainingContent() throws { + while contentBytesRemaining > 0 { + let toSkip = min(Int(contentBytesRemaining), internalBuffer.count) + try readExactInternal(into: &internalBuffer, count: toSkip) + contentBytesRemaining -= Int64(toSkip) + } + + if paddingBytesRemaining > 0 { + try skipBytes(paddingBytesRemaining) + paddingBytesRemaining = 0 + } + + currentHeader = nil + } + + /// Copy the current entry's content to a destination file descriptor. + /// - Parameter destination: The file descriptor to write content to. + /// - Throws: `TarReaderError.invalidState` if no current entry exists. + public func readFile(to destination: FileDescriptor) throws { + guard currentHeader != nil else { + throw TarReaderError.invalidState("No current entry - call nextHeader() first") + } + + while contentBytesRemaining > 0 { + let toRead = min(Int(contentBytesRemaining), copyBuffer.count) + let bytesRead = try readContent(into: UnsafeMutableRawBufferPointer(rebasing: copyBuffer[0.. 0 { + let toRead = min(Int(contentBytesRemaining), copyBuffer.count) + let bytesRead = try readContent(into: UnsafeMutableRawBufferPointer(rebasing: copyBuffer[0.. 0 { + let paddingSize = TarConstants.blockSize - remainder + try skipBytes(paddingSize) + } + } + + /// Skip the specified number of bytes. + private func skipBytes(_ count: Int) throws { + var remaining = count + while remaining > 0 { + let toSkip = min(remaining, internalBuffer.count) + try readExactInternal(into: &internalBuffer, count: toSkip) + remaining -= toSkip + } + } +} diff --git a/Sources/ContainerizationTar/TarWriter.swift b/Sources/ContainerizationTar/TarWriter.swift new file mode 100644 index 00000000..2acd6ea4 --- /dev/null +++ b/Sources/ContainerizationTar/TarWriter.swift @@ -0,0 +1,481 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +import SystemPackage + +#if canImport(_NIOFileSystem) +import NIOCore +import _NIOFileSystem +#endif + +#if canImport(Musl) +import Musl +#elseif canImport(Glibc) +import Glibc +#elseif canImport(Darwin) +import Darwin +#endif + +/// Errors that can occur during TAR writing. +public enum TarWriterError: Error, Sendable { + /// The path is too long and cannot be represented. + case pathTooLong(String) + + /// Failed to serialize header. + case headerSerializationFailed + + /// File size mismatch - wrote different amount than declared. + case sizeMismatch(expected: Int64, actual: Int64) + + /// I/O error during writing. + case ioError(Int32) + + /// Write returned zero bytes unexpectedly. + case writeZeroBytes + + /// Invalid entry state. + case invalidState(String) +} + +/// A TAR archive writer with PAX support. +/// +/// Example usage: +/// ```swift +/// let writer = try TarWriter(fileDescriptor: fd) +/// +/// // Write a directory +/// try writer.writeDirectory(path: "mydir", mode: 0o755) +/// +/// // Write a file with content from a buffer +/// try writer.beginFile(path: "mydir/hello.txt", size: 13) +/// try buffer.withUnsafeBytes { ptr in +/// try writer.writeContent(ptr) +/// } +/// try writer.finalizeEntry() +/// +/// // Write a symlink +/// try writer.writeSymlink(path: "mydir/link", target: "hello.txt") +/// +/// // Finalize the archive +/// try writer.finalize() +/// ``` +public final class TarWriter { + private let fileDescriptor: FileDescriptor + private let ownsFileDescriptor: Bool + + /// Reusable buffer for streaming file content. + private let copyBuffer: UnsafeMutableRawBufferPointer + + /// Track bytes written for current entry (for size validation). + private var currentEntryBytesWritten: Int64 = 0 + private var currentEntryExpectedSize: Int64 = 0 + private var writingEntryContent = false + + private var finalized = false + + /// Create a TAR writer from a file descriptor. + /// - Parameters: + /// - fileDescriptor: The file descriptor to write to. + /// - ownsFileDescriptor: If true, the writer will close the file descriptor when done. + public init(fileDescriptor: FileDescriptor, ownsFileDescriptor: Bool = false) { + self.fileDescriptor = fileDescriptor + self.ownsFileDescriptor = ownsFileDescriptor + self.copyBuffer = UnsafeMutableRawBufferPointer.allocate(byteCount: 128 * 1024, alignment: 1) + } + + /// Create a TAR writer from a file path. + /// - Parameter path: The path to the TAR file to create. + public convenience init(path: FilePath) throws { + let fd = try FileDescriptor.open( + path, + .writeOnly, + options: [.create, .truncate], + permissions: [.ownerReadWrite, .groupRead, .otherRead] + ) + self.init(fileDescriptor: fd, ownsFileDescriptor: true) + } + + deinit { + copyBuffer.deallocate() + if ownsFileDescriptor { + try? fileDescriptor.close() + } + } + + /// Write a directory entry. + public func writeDirectory( + path: String, + mode: UInt32 = 0o755, + uid: UInt32 = 0, + gid: UInt32 = 0, + mtime: Int64? = nil, + userName: String = "root", + groupName: String = "root" + ) throws { + try ensureNotFinalized() + try ensureNotWritingContent() + + // Ensure path ends with / + var dirPath = path + if !dirPath.hasSuffix("/") { + dirPath += "/" + } + + let header = TarHeader( + path: dirPath, + mode: mode, + uid: uid, + gid: gid, + size: 0, + mtime: mtime ?? currentTimestamp(), + entryType: .directory, + userName: userName, + groupName: groupName + ) + + try writeHeader(header) + } + + /// Write a file entry header, preparing for streaming content. + /// After calling this, use `writeContent` to write the file data, + /// then call `finalizeEntry` when done. + public func beginFile( + path: String, + size: Int64, + mode: UInt32 = 0o644, + uid: UInt32 = 0, + gid: UInt32 = 0, + mtime: Int64? = nil, + userName: String = "root", + groupName: String = "root" + ) throws { + try ensureNotFinalized() + try ensureNotWritingContent() + + let header = TarHeader( + path: path, + mode: mode, + uid: uid, + gid: gid, + size: size, + mtime: mtime ?? currentTimestamp(), + entryType: .regular, + userName: userName, + groupName: groupName + ) + + try writeHeader(header) + + currentEntryExpectedSize = size + currentEntryBytesWritten = 0 + writingEntryContent = true + } + + /// Write content for the current file entry. + /// Must be called after `beginFile` and before `finalizeEntry`. + /// - Parameter buffer: The buffer containing data to write. + public func writeContent(_ buffer: UnsafeRawBufferPointer) throws { + try ensureNotFinalized() + + guard writingEntryContent else { + throw TarWriterError.invalidState("Not currently writing file content") + } + + try writeAll(buffer) + currentEntryBytesWritten += Int64(buffer.count) + } + + /// Finalize the current entry, adding padding if needed. + public func finalizeEntry() throws { + try ensureNotFinalized() + + guard writingEntryContent else { + throw TarWriterError.invalidState("Not currently writing file content") + } + + if currentEntryBytesWritten != currentEntryExpectedSize { + throw TarWriterError.sizeMismatch( + expected: currentEntryExpectedSize, + actual: currentEntryBytesWritten + ) + } + + try writePadding(for: currentEntryBytesWritten) + + writingEntryContent = false + currentEntryBytesWritten = 0 + currentEntryExpectedSize = 0 + } + + /// Write a symbolic link entry. + public func writeSymlink( + path: String, + target: String, + uid: UInt32 = 0, + gid: UInt32 = 0, + mtime: Int64? = nil, + userName: String = "root", + groupName: String = "root" + ) throws { + try ensureNotFinalized() + try ensureNotWritingContent() + + let header = TarHeader( + path: path, + mode: 0o777, + uid: uid, + gid: gid, + size: 0, + mtime: mtime ?? currentTimestamp(), + entryType: .symbolicLink, + linkName: target, + userName: userName, + groupName: groupName + ) + + try writeHeader(header) + } + + /// Write a hard link entry. + public func writeHardLink( + path: String, + target: String, + uid: UInt32 = 0, + gid: UInt32 = 0, + mtime: Int64? = nil, + userName: String = "root", + groupName: String = "root" + ) throws { + try ensureNotFinalized() + try ensureNotWritingContent() + + let header = TarHeader( + path: path, + mode: 0o644, + uid: uid, + gid: gid, + size: 0, + mtime: mtime ?? currentTimestamp(), + entryType: .hardLink, + linkName: target, + userName: userName, + groupName: groupName + ) + + try writeHeader(header) + } + + /// Write a file entry by reading content from a file descriptor. + /// The file size is determined automatically via fstat. + /// - Parameters: + /// - path: The path for the entry in the archive. + /// - source: The file descriptor to read content from. + /// - mode: File mode/permissions (default: 0o644). + /// - uid: Owner user ID (default: 0). + /// - gid: Owner group ID (default: 0). + /// - mtime: Modification time as Unix timestamp (default: current time). + /// - userName: Owner user name (default: "root"). + /// - groupName: Owner group name (default: "root"). + public func writeFile( + path: String, + from source: FileDescriptor, + mode: UInt32 = 0o644, + uid: UInt32 = 0, + gid: UInt32 = 0, + mtime: Int64? = nil, + userName: String = "root", + groupName: String = "root" + ) throws { + try ensureNotFinalized() + try ensureNotWritingContent() + + var statBuf = stat() + guard fstat(source.rawValue, &statBuf) == 0 else { + throw TarWriterError.ioError(errno) + } + let size = Int64(statBuf.st_size) + + let header = TarHeader( + path: path, + mode: mode, + uid: uid, + gid: gid, + size: size, + mtime: mtime ?? currentTimestamp(), + entryType: .regular, + userName: userName, + groupName: groupName + ) + try writeHeader(header) + + var remaining = size + while remaining > 0 { + let toRead = min(Int(remaining), copyBuffer.count) + let readBuffer = UnsafeMutableRawBufferPointer(rebasing: copyBuffer[0.. TarConstants.maxNameLength { + // Truncate path to last 100 chars for fallback + let pathBytes = Array(header.path.utf8) + truncatedHeader.path = String(decoding: pathBytes.suffix(TarConstants.maxNameLength), as: UTF8.self) + } + if header.linkName.utf8.count > TarHeaderField.linkNameSize { + let linkBytes = Array(header.linkName.utf8) + truncatedHeader.linkName = String(decoding: linkBytes.suffix(TarHeaderField.linkNameSize), as: UTF8.self) + } + if header.size > TarConstants.maxTraditionalSize { + truncatedHeader.size = TarConstants.maxTraditionalSize + } + + guard let headerBlock = truncatedHeader.serialize() else { + throw TarWriterError.headerSerializationFailed + } + try headerBlock.withUnsafeBytes { ptr in + try writeAll(ptr) + } + } else { + // No PAX needed, write regular header + guard let headerBlock = header.serialize() else { + throw TarWriterError.headerSerializationFailed + } + try headerBlock.withUnsafeBytes { ptr in + try writeAll(ptr) + } + } + } + + /// Write padding to align to 512-byte boundary. + private func writePadding(for size: Int64) throws { + let remainder = Int(size % Int64(TarConstants.blockSize)) + if remainder > 0 { + let padding = [UInt8](repeating: 0, count: TarConstants.blockSize - remainder) + try padding.withUnsafeBytes { ptr in + try writeAll(ptr) + } + } + } + + /// Write all bytes from the buffer to the file descriptor. + private func writeAll(_ buffer: UnsafeRawBufferPointer) throws { + var totalWritten = 0 + while totalWritten < buffer.count { + let remaining = UnsafeRawBufferPointer(rebasing: buffer[totalWritten...]) + let written = try fileDescriptor.write(remaining) + if written == 0 { + throw TarWriterError.writeZeroBytes + } + totalWritten += written + } + } + + private func ensureNotFinalized() throws { + if finalized { + throw TarWriterError.invalidState("Archive has been finalized") + } + } + + private func ensureNotWritingContent() throws { + if writingEntryContent { + throw TarWriterError.invalidState("Must call finalizeEntry() before writing another entry") + } + } + + private func currentTimestamp() -> Int64 { + var tv = timeval() + gettimeofday(&tv, nil) + return Int64(tv.tv_sec) + } +} diff --git a/Tests/ContainerizationTarTests/TarTests.swift b/Tests/ContainerizationTarTests/TarTests.swift new file mode 100644 index 00000000..fc84ffac --- /dev/null +++ b/Tests/ContainerizationTarTests/TarTests.swift @@ -0,0 +1,1236 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2026 Apple Inc. and the Containerization project authors. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +import Foundation +import SystemPackage +import Testing + +@testable import _ContainerizationTar + +#if canImport(_NIOFileSystem) +import NIOCore +import _NIOFileSystem +#endif + +// MARK: - TarHeader Tests + +@Suite("TarHeader Tests") +struct TarHeaderTests { + + // MARK: - Octal Encoding Tests + + @Test("Format octal - zero") + func formatOctalZero() { + let result = TarHeader.formatOctal(0, width: 8) + // Should be "0000000\0" + #expect(result.count == 8) + #expect(result[0] == 0x30) // '0' + #expect(result[6] == 0x30) // '0' + #expect(result[7] == 0x00) // null terminator + } + + @Test("Format octal - small number") + func formatOctalSmall() { + let result = TarHeader.formatOctal(0o755, width: 8) + let string = String(decoding: result.dropLast(), as: UTF8.self) + #expect(string == "0000755") + } + + @Test("Format octal - file size") + func formatOctalFileSize() { + let result = TarHeader.formatOctal(1234, width: 12) + // 1234 in octal is 2322 + let string = String(decoding: result.dropLast(), as: UTF8.self) + #expect(string.contains("2322")) + } + + @Test("Format octal - max traditional size") + func formatOctalMaxSize() { + let maxSize: Int64 = 0o77777777777 // ~8GB + let result = TarHeader.formatOctal(maxSize, width: 12) + let string = String(decoding: result.dropLast(), as: UTF8.self) + #expect(string == "77777777777") + } + + // MARK: - Octal Parsing Tests + + @Test("Parse octal - zero") + func parseOctalZero() { + let bytes: [UInt8] = Array("0000000\0".utf8) + let result = TarHeader.parseOctal(bytes[...]) + #expect(result == 0) + } + + @Test("Parse octal - permissions") + func parseOctalPermissions() { + let bytes: [UInt8] = Array("0000755\0".utf8) + let result = TarHeader.parseOctal(bytes[...]) + #expect(result == 0o755) + } + + @Test("Parse octal - with spaces") + func parseOctalWithSpaces() { + let bytes: [UInt8] = Array(" 755 \0".utf8) + let result = TarHeader.parseOctal(bytes[...]) + #expect(result == 0o755) + } + + @Test("Parse octal - file size") + func parseOctalFileSize() { + let bytes: [UInt8] = Array("00000002322\0".utf8) + let result = TarHeader.parseOctal(bytes[...]) + #expect(result == 1234) + } + + // MARK: - String Parsing Tests + + @Test("Parse string - null terminated") + func parseStringNullTerminated() { + var bytes: [UInt8] = Array("hello.txt".utf8) + bytes.append(0) + bytes.append(contentsOf: [0, 0, 0]) // Padding + let result = TarHeader.parseString(bytes[...]) + #expect(result == "hello.txt") + } + + @Test("Parse string - full field") + func parseStringFullField() { + let bytes: [UInt8] = Array("thisisaverylongfilename".utf8) + let result = TarHeader.parseString(bytes[...]) + #expect(result == "thisisaverylongfilename") + } + + // MARK: - Header Serialization Tests + + @Test("Serialize simple header") + func serializeSimpleHeader() throws { + let header = TarHeader( + path: "hello.txt", + mode: 0o644, + uid: 1000, + gid: 1000, + size: 13, + mtime: 1_704_067_200, // 2024-01-01 00:00:00 UTC + entryType: .regular, + userName: "user", + groupName: "group" + ) + + let serialized = try #require(header.serialize()) + #expect(serialized.count == 512) + + // Verify name field + let name = TarHeader.parseString(serialized[0..<100]) + #expect(name == "hello.txt") + + // Verify magic + let magic = Array(serialized[257..<263]) + #expect(magic == TarConstants.magic) + + // Verify version + let version = Array(serialized[263..<265]) + #expect(version == TarConstants.version) + } + + @Test("Serialize directory header") + func serializeDirectoryHeader() throws { + let header = TarHeader( + path: "mydir/", + mode: 0o755, + entryType: .directory + ) + + let serialized = try #require(header.serialize()) + + // Verify type flag + #expect(serialized[156] == TarEntryType.directory.rawValue) + + // Verify name ends with / + let name = TarHeader.parseString(serialized[0..<100]) + #expect(name.hasSuffix("/")) + } + + @Test("Serialize returns nil for long path") + func serializeLongPathReturnsNil() { + // Path longer than 255 bytes (100 name + 155 prefix) + let longPath = String(repeating: "a", count: 300) + let header = TarHeader(path: longPath) + + let serialized = header.serialize() + #expect(serialized == nil) + } + + // MARK: - Header Parsing Tests + + @Test("Parse serialized header roundtrip") + func parseSerializedHeaderRoundtrip() throws { + let original = TarHeader( + path: "test/file.txt", + mode: 0o644, + uid: 1000, + gid: 1000, + size: 12345, + mtime: 1_704_067_200, + entryType: .regular, + userName: "testuser", + groupName: "testgroup" + ) + + let serialized = try #require(original.serialize()) + let parsed = try #require(TarHeader.parse(from: serialized)) + + #expect(parsed.path == original.path) + #expect(parsed.mode == original.mode) + #expect(parsed.uid == original.uid) + #expect(parsed.gid == original.gid) + #expect(parsed.size == original.size) + #expect(parsed.mtime == original.mtime) + #expect(parsed.entryType == original.entryType) + #expect(parsed.userName == original.userName) + #expect(parsed.groupName == original.groupName) + } + + @Test("Parse empty block returns nil") + func parseEmptyBlockReturnsNil() { + let emptyBlock = [UInt8](repeating: 0, count: 512) + let result = TarHeader.parse(from: emptyBlock) + #expect(result == nil) + } + + @Test("Parse corrupted header returns nil") + func parseCorruptedHeaderReturnsNil() throws { + let header = TarHeader(path: "test.txt", size: 100) + var serialized = try #require(header.serialize()) + + // Corrupt the checksum + serialized[148] = 0xFF + serialized[149] = 0xFF + + let result = TarHeader.parse(from: serialized) + #expect(result == nil) + } + + // MARK: - Entry Type Tests + + @Test("Entry type regular file detection") + func entryTypeRegularFile() { + #expect(TarEntryType.regular.isRegularFile) + #expect(TarEntryType.regularAlt.isRegularFile) + #expect(!TarEntryType.directory.isRegularFile) + #expect(!TarEntryType.symbolicLink.isRegularFile) + } +} + +// MARK: - TarPax Tests + +@Suite("TarPax Tests") +struct TarPaxTests { + + @Test("Make PAX record - short value") + func makePaxRecordShort() { + let record = TarPax.makeRecord(key: "path", value: "test.txt") + let string = String(decoding: record, as: UTF8.self) + + // Format: "LENGTH path=test.txt\n" + #expect(string.hasSuffix("\n")) + #expect(string.contains("path=test.txt")) + + // Verify length is correct + let parts = string.split(separator: " ", maxSplits: 1) + let declaredLength = Int(parts[0])! + #expect(declaredLength == record.count) + } + + @Test("Make PAX record - long value") + func makePaxRecordLong() { + let longPath = String(repeating: "a", count: 200) + let record = TarPax.makeRecord(key: "path", value: longPath) + let string = String(decoding: record, as: UTF8.self) + + // Verify length is correct (length field will be 3 digits) + let parts = string.split(separator: " ", maxSplits: 1) + let declaredLength = Int(parts[0])! + #expect(declaredLength == record.count) + } + + @Test("Make PAX record - length crosses digit boundary") + func makePaxRecordLengthCrossesBoundary() { + // Create a value that causes the length to cross from 1 to 2 digits + // "9 k=v\n" = 6 bytes, but if we add one more byte to value... + let record = TarPax.makeRecord(key: "a", value: "bb") + let string = String(decoding: record, as: UTF8.self) + + let parts = string.split(separator: " ", maxSplits: 1) + let declaredLength = Int(parts[0])! + #expect(declaredLength == record.count) + } + + @Test("Parse PAX records - single record") + func parsePaxRecordsSingle() { + let record = TarPax.makeRecord(key: "path", value: "/long/path/to/file.txt") + let parsed = TarPax.parseRecords(record) + + #expect(parsed["path"] == "/long/path/to/file.txt") + } + + @Test("Parse PAX records - multiple records") + func parsePaxRecordsMultiple() { + var data: [UInt8] = [] + data.append(contentsOf: TarPax.makeRecord(key: "path", value: "/some/path")) + data.append(contentsOf: TarPax.makeRecord(key: "size", value: "9999999999")) + data.append(contentsOf: TarPax.makeRecord(key: "uid", value: "65534")) + + let parsed = TarPax.parseRecords(data) + + #expect(parsed["path"] == "/some/path") + #expect(parsed["size"] == "9999999999") + #expect(parsed["uid"] == "65534") + } + + @Test("Requires PAX - short path") + func requiresPaxShortPath() { + let header = TarHeader(path: "short.txt", size: 100) + #expect(!TarPax.requiresPax(header)) + } + + @Test("Requires PAX - long path") + func requiresPaxLongPath() { + let longPath = String(repeating: "a", count: 150) + let header = TarHeader(path: longPath, size: 100) + #expect(TarPax.requiresPax(header)) + } + + @Test("Requires PAX - large size") + func requiresPaxLargeSize() { + let header = TarHeader(path: "file.txt", size: 10_000_000_000) + #expect(TarPax.requiresPax(header)) + } + + @Test("Requires PAX - large UID") + func requiresPaxLargeUid() { + let header = TarHeader(path: "file.txt", uid: 3_000_000) + #expect(TarPax.requiresPax(header)) + } + + @Test("Apply overrides") + func applyOverrides() { + var header = TarHeader( + path: "truncated.txt", + uid: 0, + size: 100 + ) + + let overrides = [ + "path": "/very/long/path/to/file.txt", + "size": "999999999999", + "uid": "65534", + ] + + TarPax.applyOverrides(overrides, to: &header) + + #expect(header.path == "/very/long/path/to/file.txt") + #expect(header.size == 999_999_999_999) + #expect(header.uid == 65534) + } +} + +// MARK: - TarWriter/TarReader Roundtrip Tests + +@Suite("Tar Roundtrip Tests") +struct TarRoundtripTests { + + /// Helper to create a temporary file path. + func temporaryFilePath(name: String = "test.tar") -> FilePath { + let tempDir = FileManager.default.temporaryDirectory.path + let uuid = UUID().uuidString + return FilePath("\(tempDir)/\(uuid)-\(name)") + } + + /// Helper to clean up a temporary file. + func cleanup(_ path: FilePath) { + try? FileManager.default.removeItem(atPath: path.string) + } + + @Test("Write and read single file") + func writeAndReadSingleFile() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + let content = Array("Hello, World!".utf8) + + do { + let writer = try TarWriter(path: path) + try writer.beginFile(path: "hello.txt", size: Int64(content.count), mode: 0o644) + try content.withUnsafeBytes { ptr in + try writer.writeContent(ptr) + } + try writer.finalizeEntry() + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + try #require(header.path == "hello.txt") + try #require(header.size == Int64(content.count)) + try #require(header.mode == 0o644) + try #require(header.entryType == .regular) + + // Read content + let buffer = UnsafeMutableRawBufferPointer.allocate(byteCount: 1024, alignment: 1) + defer { buffer.deallocate() } + + let bytesRead = try reader.readContent(into: buffer) + try #require(bytesRead == content.count) + + let readContent = Array(UnsafeRawBufferPointer(buffer)[0.. 0 { + let bytesRead = try reader.readContent(into: buffer) + readContent.append(contentsOf: UnsafeRawBufferPointer(buffer)[0.. 0 { + let bytesRead = try reader.readContent(into: buffer) + readData.append(contentsOf: UnsafeRawBufferPointer(buffer)[0.. FilePath { + let tempDir = FileManager.default.temporaryDirectory.path + let uuid = UUID().uuidString + return FilePath("\(tempDir)/\(uuid)-\(name)") + } + + func cleanup(_ path: FilePath) { + try? FileManager.default.removeItem(atPath: path.string) + } + + @Test("Async write file from readable handle") + func asyncWriteFileFromHandle() async throws { + let tarPath = temporaryFilePath() + defer { cleanup(tarPath) } + + let sourceDir = FileManager.default.temporaryDirectory.path + let sourceFile = "\(sourceDir)/\(UUID().uuidString)-source.txt" + defer { try? FileManager.default.removeItem(atPath: sourceFile) } + + let content = "Hello from async source file!\nMultiple lines here.\n" + try content.write(toFile: sourceFile, atomically: true, encoding: .utf8) + + let writer = try TarWriter(path: tarPath) + try await FileSystem.shared.withFileHandle(forReadingAt: FilePath(sourceFile)) { handle in + try await writer.writeFile(path: "async-copied.txt", from: handle, mode: 0o600) + } + try writer.finalize() + + let reader = try TarReader(path: tarPath) + let header = try #require(try reader.nextHeader()) + + #expect(header.path == "async-copied.txt") + #expect(header.size == Int64(content.utf8.count)) + #expect(header.mode == 0o600) + + let buffer = UnsafeMutableRawBufferPointer.allocate(byteCount: 4096, alignment: 1) + defer { buffer.deallocate() } + + var readData = [UInt8]() + while reader.contentBytesRemaining > 0 { + let bytesRead = try reader.readContent(into: buffer) + readData.append(contentsOf: UnsafeRawBufferPointer(buffer)[0.. FilePath { + let tempDir = FileManager.default.temporaryDirectory.path + let uuid = UUID().uuidString + return FilePath("\(tempDir)/\(uuid)-\(name)") + } + + func cleanup(_ path: FilePath) { + try? FileManager.default.removeItem(atPath: path.string) + } + + @Test("Long path triggers PAX header") + func longPathTriggersPax() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + // Create a path with no valid split point (filename alone is > 100 chars) + let longFilename = String(repeating: "a", count: 120) + ".txt" + let longPath = "dir/" + longFilename + #expect(longPath.utf8.count > 100) + + let content = Array("content".utf8) + + do { + let writer = try TarWriter(path: path) + try writer.beginFile(path: longPath, size: Int64(content.count)) + try content.withUnsafeBytes { try writer.writeContent($0) } + try writer.finalizeEntry() + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + // The full path should be preserved via PAX + #expect(header.path == longPath) + } + + @Test("Very long path with PAX") + func veryLongPathWithPax() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + // Create a path longer than 255 characters (traditional max with prefix) + let longPath = + String(repeating: "a", count: 50) + "/" + String(repeating: "b", count: 50) + "/" + String(repeating: "c", count: 50) + "/" + String(repeating: "d", count: 50) + "/" + + String(repeating: "e", count: 50) + "/file.txt" + #expect(longPath.utf8.count > 255) + + let content = Array("test".utf8) + + do { + let writer = try TarWriter(path: path) + try writer.beginFile(path: longPath, size: Int64(content.count)) + try content.withUnsafeBytes { try writer.writeContent($0) } + try writer.finalizeEntry() + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + #expect(header.path == longPath) + } + + @Test("UTF-8 path preserved") + func utf8PathPreserved() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + let unicodePath = "目录/文件.txt" + let content = Array("内容".utf8) + + do { + let writer = try TarWriter(path: path) + try writer.beginFile(path: unicodePath, size: Int64(content.count)) + try content.withUnsafeBytes { try writer.writeContent($0) } + try writer.finalizeEntry() + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + #expect(header.path == unicodePath) + } + + @Test("Long symlink target with PAX") + func longSymlinkTargetWithPax() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + // Create a symlink target longer than 100 characters + let longTarget = String(repeating: "x", count: 150) + "/target.txt" + + do { + let writer = try TarWriter(path: path) + try writer.writeSymlink(path: "link", target: longTarget) + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + #expect(header.path == "link") + #expect(header.linkName == longTarget) + #expect(header.entryType == .symbolicLink) + } +} + +// MARK: - Error Handling Tests + +@Suite("Tar Error Handling Tests") +struct TarErrorTests { + + @Test("Size mismatch error") + func sizeMismatchError() throws { + let tempPath = FilePath(FileManager.default.temporaryDirectory.path + "/\(UUID().uuidString).tar") + defer { try? FileManager.default.removeItem(atPath: tempPath.string) } + + let writer = try TarWriter(path: tempPath) + try writer.beginFile(path: "file.txt", size: 100) + + // Only write 50 bytes + let smallContent = [UInt8](repeating: 0x41, count: 50) + try smallContent.withUnsafeBytes { try writer.writeContent($0) } + + // Should throw size mismatch + #expect(throws: TarWriterError.self) { + try writer.finalizeEntry() + } + } + + @Test("Write after finalize error") + func writeAfterFinalizeError() throws { + let tempPath = FilePath(FileManager.default.temporaryDirectory.path + "/\(UUID().uuidString).tar") + defer { try? FileManager.default.removeItem(atPath: tempPath.string) } + + let writer = try TarWriter(path: tempPath) + try writer.finalize() + + #expect(throws: TarWriterError.self) { + try writer.writeDirectory(path: "dir") + } + } + + @Test("Reader invalid state error") + func readerInvalidStateError() throws { + let tempPath = FilePath(FileManager.default.temporaryDirectory.path + "/\(UUID().uuidString).tar") + defer { try? FileManager.default.removeItem(atPath: tempPath.string) } + + // Create empty tar + do { + let writer = try TarWriter(path: tempPath) + try writer.finalize() + } + + let reader = try TarReader(path: tempPath) + let buffer = UnsafeMutableRawBufferPointer.allocate(byteCount: 100, alignment: 1) + defer { buffer.deallocate() } + + // Try to read content without calling nextHeader first + #expect(throws: TarReaderError.self) { + _ = try reader.readContent(into: buffer) + } + } +} + +// MARK: - Metadata Preservation Tests + +@Suite("Metadata Preservation Tests") +struct MetadataTests { + + func temporaryFilePath() -> FilePath { + let tempDir = FileManager.default.temporaryDirectory.path + return FilePath("\(tempDir)/\(UUID().uuidString).tar") + } + + func cleanup(_ path: FilePath) { + try? FileManager.default.removeItem(atPath: path.string) + } + + @Test("UID and GID preserved") + func uidGidPreserved() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + do { + let writer = try TarWriter(path: path) + try writer.beginFile(path: "file.txt", size: 0, uid: 1000, gid: 2000) + try writer.finalizeEntry() + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + #expect(header.uid == 1000) + #expect(header.gid == 2000) + } + + @Test("Mtime preserved") + func mtimePreserved() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + let mtime: Int64 = 1_704_067_200 // 2024-01-01 00:00:00 UTC + + do { + let writer = try TarWriter(path: path) + try writer.beginFile(path: "file.txt", size: 0, mtime: mtime) + try writer.finalizeEntry() + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + #expect(header.mtime == mtime) + } + + @Test("User and group name preserved") + func userGroupNamePreserved() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + do { + let writer = try TarWriter(path: path) + try writer.beginFile(path: "file.txt", size: 0, userName: "testuser", groupName: "testgroup") + try writer.finalizeEntry() + try writer.finalize() + } + + let reader = try TarReader(path: path) + let header = try #require(try reader.nextHeader()) + + #expect(header.userName == "testuser") + #expect(header.groupName == "testgroup") + } + + @Test("Different file modes") + func differentFileModes() throws { + let path = temporaryFilePath() + defer { cleanup(path) } + + let modes: [UInt32] = [0o644, 0o755, 0o600, 0o777, 0o400] + + do { + let writer = try TarWriter(path: path) + for (i, mode) in modes.enumerated() { + try writer.beginFile(path: "file\(i).txt", size: 0, mode: mode) + try writer.finalizeEntry() + } + try writer.finalize() + } + + let reader = try TarReader(path: path) + for (i, expectedMode) in modes.enumerated() { + let header = try #require(try reader.nextHeader()) + #expect(header.path == "file\(i).txt") + #expect(header.mode == expectedMode) + } + } +} + +// MARK: - System Tar Interoperability Tests + +@Suite("System Tar Interoperability Tests") +struct SystemTarTests { + + func temporaryDirectory() -> String { + let tempDir = FileManager.default.temporaryDirectory.path + let uuid = UUID().uuidString + let path = "\(tempDir)/\(uuid)" + try? FileManager.default.createDirectory(atPath: path, withIntermediateDirectories: true) + return path + } + + func cleanup(_ path: String) { + try? FileManager.default.removeItem(atPath: path) + } + + @Test("Read tar created by system tar") + func readSystemTar() throws { + let workDir = temporaryDirectory() + defer { cleanup(workDir) } + + let sourceDir = "\(workDir)/source" + try FileManager.default.createDirectory(atPath: sourceDir, withIntermediateDirectories: true) + + let file1Content = "Hello from file1" + let file2Content = "Content of file2 with more text" + try file1Content.write(toFile: "\(sourceDir)/file1.txt", atomically: true, encoding: .utf8) + try file2Content.write(toFile: "\(sourceDir)/file2.txt", atomically: true, encoding: .utf8) + try FileManager.default.createDirectory(atPath: "\(sourceDir)/subdir", withIntermediateDirectories: true) + try "nested".write(toFile: "\(sourceDir)/subdir/nested.txt", atomically: true, encoding: .utf8) + + let tarPath = "\(workDir)/test.tar" + let process = Process() + process.executableURL = URL(fileURLWithPath: "/usr/bin/tar") + process.arguments = ["-cf", tarPath, "-C", sourceDir, "."] + try process.run() + process.waitUntilExit() + #expect(process.terminationStatus == 0) + + let reader = try TarReader(path: FilePath(tarPath)) + let buffer = UnsafeMutableRawBufferPointer.allocate(byteCount: 4096, alignment: 1) + defer { buffer.deallocate() } + + var entries: [String: (TarEntryType, String)] = [:] + + while let header = try reader.nextHeader() { + var content = "" + if header.entryType.isRegularFile && header.size > 0 { + var data = [UInt8]() + while reader.contentBytesRemaining > 0 { + let bytesRead = try reader.readContent(into: buffer) + data.append(contentsOf: UnsafeRawBufferPointer(buffer)[0..