From 018602c7e40e8f8096af686254964e05eb7e561a Mon Sep 17 00:00:00 2001 From: martinRenou Date: Tue, 14 Oct 2025 11:18:54 +0200 Subject: [PATCH 1/4] Resolve symlinks content --- unpack.c | 90 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 85 insertions(+), 5 deletions(-) diff --git a/unpack.c b/unpack.c index 8756e91..618116a 100644 --- a/unpack.c +++ b/unpack.c @@ -41,6 +41,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { FileData* files = NULL; size_t files_count = 0; const char *error_message; + bool hasSymLinks = false; ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); if (!result) { @@ -57,13 +58,20 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { archive_read_support_format_all(archive); if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) { - return error_handler(result,archive_error_string(archive), archive); + return error_handler(result,archive_error_string(archive), archive); } files = malloc(sizeof(FileData) * files_struct_length); while (archive_read_next_header(archive, &entry) == ARCHIVE_OK) { const char* filename = archive_entry_pathname(entry); size_t entrySize = archive_entry_size(entry); + + // Ignore symbolic links for now + if (archive_entry_filetype(entry) == AE_IFLNK) { + hasSymLinks = true; + continue; + } + if (files_count + 1 > files_struct_length) { files_struct_length *= 2; // double the length FileData* oldfiles = files; @@ -73,8 +81,9 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed. error_message = "Memory allocation error for file data."; return error_handler(result, error_message, archive); - } + } } + files[files_count].filename = strdup(filename); files[files_count].data = malloc(entrySize); files[files_count].data_size = entrySize; @@ -105,6 +114,77 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { files_count++; } + // Resolve symlinks + if (hasSymLinks) { + // Rewind and reopen the archive to iterate over symlinks + archive_read_free(archive); + archive = archive_read_new(); + archive_read_support_filter_all(archive); + archive_read_support_format_all(archive); + + if (archive_read_open_memory(archive, inputData, inputSize) != ARCHIVE_OK) { + return error_handler(result, archive_error_string(archive), archive); + } + + struct archive_entry *symlink_entry; + while (archive_read_next_header(archive, &symlink_entry) == ARCHIVE_OK) { + // Process only symlinks this time + if (archive_entry_filetype(symlink_entry) != AE_IFLNK) { + continue; + } + + const char *linkname = archive_entry_pathname(symlink_entry); + const char *target = archive_entry_symlink(symlink_entry); + + // Target not found + if (!target) { + continue; + } + + // Find the target file in the already populated files[] + size_t target_index = (size_t)-1; + for (size_t i = 0; i < files_count; i++) { + if (strcmp(files[i].filename, target) == 0) { + target_index = i; + break; + } + } + + // Target not found in the processed files + if (target_index == (size_t)-1 || !files[target_index].data) { + continue; + } + + // Add the symlink entry + if (files_count + 1 > files_struct_length) { + files_struct_length *= 2; + FileData *oldfiles = files; + files = realloc(files, sizeof(FileData) * files_struct_length); + if (!files) { + result->fileCount = files_count; + result->files = oldfiles; + error_message = "Memory allocation error for symlink data."; + return error_handler(result, error_message, archive); + } + } + + files[files_count].filename = strdup(linkname); + files[files_count].data_size = files[target_index].data_size; + files[files_count].data = malloc(files[target_index].data_size); + if (!files[files_count].data) { + free(files[files_count].filename); + files[files_count].filename = NULL; + result->fileCount = files_count; + result->files = files; + error_message = "Memory allocation error for symlink target data."; + return error_handler(result, error_message, archive); + } + memcpy(files[files_count].data, files[target_index].data, files[target_index].data_size); + + files_count++; + } + } + archive_read_free(archive); result->files = files; result->fileCount = files_count; @@ -150,7 +230,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { const size_t buffsize = 64 * 1024; char buff[buffsize]; - size_t total_size = 0; + size_t total_size = 0; const char *error_message; FileData* files = malloc(sizeof(FileData) * (files_count + 1)); @@ -159,7 +239,7 @@ ExtractedArchive* decompression(uint8_t* inputData, size_t inputSize) { printf("Failed to allocate memory for files array\n"); return NULL; } - + ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); if (!result) { free(files); @@ -259,4 +339,4 @@ void free_extracted_archive(ExtractedArchive* archive) { } free(archive->files); free(archive); -} \ No newline at end of file +} From 19369a351c864717a992d04dc01bff118a8c83b3 Mon Sep 17 00:00:00 2001 From: martinRenou Date: Tue, 14 Oct 2025 15:01:05 +0200 Subject: [PATCH 2/4] Recursive resolve of symlinks --- unpack.c | 124 +++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 92 insertions(+), 32 deletions(-) diff --git a/unpack.c b/unpack.c index 618116a..126c845 100644 --- a/unpack.c +++ b/unpack.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -19,6 +20,11 @@ typedef struct { char error_message[256]; } ExtractedArchive; +typedef struct { + char *linkname; + char *target; +} SymlinkInfo; + ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_message, struct archive* archive) { if (!result || !archive) { @@ -33,6 +39,42 @@ ExtractedArchive* error_handler(ExtractedArchive* result, const char *error_mess return result; } +static char* join_paths(const char *dir, const char *relative) { + if (!dir || !*dir) return strdup(relative); + size_t len = strlen(dir) + 1 + strlen(relative) + 1; + char *buf = malloc(len); + snprintf(buf, len, "%s/%s", dir, relative); + return buf; +} + +static const FileData *resolve_target_recursive(const FileData *files, size_t file_count, + const SymlinkInfo *symlinks, size_t symlink_count, + const char *target, int depth) +{ + if (!target || depth > 32) // prevent infinite recursion + return NULL; + + // First, check if target is a regular file + for (size_t i = 0; i < file_count; i++) { + if (strcmp(files[i].filename, target) == 0) { + if (files[i].data && files[i].data_size > 0) { + return &files[i]; // Found real file + } + } + } + + // If not found among files, maybe it's another symlink + for (size_t i = 0; i < symlink_count; i++) { + if (strcmp(symlinks[i].linkname, target) == 0) { + // Recurse into that symlink's target + return resolve_target_recursive(files, file_count, symlinks, symlink_count, + symlinks[i].target, depth + 1); + } + } + + return NULL; // Not found +} + EMSCRIPTEN_KEEPALIVE ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { struct archive* archive; @@ -40,7 +82,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { size_t files_struct_length = 100; FileData* files = NULL; size_t files_count = 0; - const char *error_message; + const char* error_message; bool hasSymLinks = false; ExtractedArchive* result = (ExtractedArchive*)malloc(sizeof(ExtractedArchive)); @@ -75,7 +117,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { if (files_count + 1 > files_struct_length) { files_struct_length *= 2; // double the length FileData* oldfiles = files; - files= realloc(files, sizeof(FileData) * files_struct_length); + files = realloc(files, sizeof(FileData) * files_struct_length); if (!files) { result->fileCount = files_count; result->files = oldfiles; // otherwise memory is lost, alternatively also everything can be freed. @@ -116,7 +158,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { // Resolve symlinks if (hasSymLinks) { - // Rewind and reopen the archive to iterate over symlinks + // Reopen the archive to iterate over symlinks archive_read_free(archive); archive = archive_read_new(); archive_read_support_filter_all(archive); @@ -127,35 +169,54 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { } struct archive_entry *symlink_entry; + + size_t symlink_count = 0; + size_t symlink_alloc = 16; + SymlinkInfo *symlinks = malloc(sizeof(SymlinkInfo) * symlink_alloc); + + // Collect all symlink entries while (archive_read_next_header(archive, &symlink_entry) == ARCHIVE_OK) { - // Process only symlinks this time - if (archive_entry_filetype(symlink_entry) != AE_IFLNK) { + if (archive_entry_filetype(symlink_entry) != AE_IFLNK) continue; - } - const char *linkname = archive_entry_pathname(symlink_entry); - const char *target = archive_entry_symlink(symlink_entry); + const char *tgt = archive_entry_symlink(symlink_entry); - // Target not found - if (!target) { + if (!tgt) { continue; } - // Find the target file in the already populated files[] - size_t target_index = (size_t)-1; - for (size_t i = 0; i < files_count; i++) { - if (strcmp(files[i].filename, target) == 0) { - target_index = i; - break; - } + if (symlink_count + 1 > symlink_alloc) { + symlink_alloc *= 2; + symlinks = realloc(symlinks, sizeof(SymlinkInfo) * symlink_alloc); } - // Target not found in the processed files - if (target_index == (size_t)-1 || !files[target_index].data) { - continue; + // Compute directory of the symlink + char *link_dir = strdup(archive_entry_pathname(symlink_entry)); + char *dir = dirname(link_dir); + char *resolved_target_path = join_paths(dir, tgt); + free(dir); + free(link_dir); + + symlinks[symlink_count].linkname = strdup(archive_entry_pathname(symlink_entry)); + symlinks[symlink_count].target = strdup(resolved_target_path); + symlink_count++; + } + + // Resolve and populate symlinks + for (size_t i = 0; i < symlink_count; i++) { + const char *linkname = symlinks[i].linkname; + const char *target = symlinks[i].target; + + const FileData *resolved = resolve_target_recursive(files, files_count, + symlinks, symlink_count, + target, 0); + + if (!resolved) { + // error_message = "Failed to resolve symlink."; + error_message = target; + return error_handler(result, error_message, archive); } - // Add the symlink entry if (files_count + 1 > files_struct_length) { files_struct_length *= 2; FileData *oldfiles = files; @@ -169,20 +230,19 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { } files[files_count].filename = strdup(linkname); - files[files_count].data_size = files[target_index].data_size; - files[files_count].data = malloc(files[target_index].data_size); - if (!files[files_count].data) { - free(files[files_count].filename); - files[files_count].filename = NULL; - result->fileCount = files_count; - result->files = files; - error_message = "Memory allocation error for symlink target data."; - return error_handler(result, error_message, archive); - } - memcpy(files[files_count].data, files[target_index].data, files[target_index].data_size); + + files[files_count].data_size = resolved->data_size; + files[files_count].data = malloc(resolved->data_size); + memcpy(files[files_count].data, resolved->data, resolved->data_size); files_count++; } + + for (size_t i = 0; i < symlink_count; i++) { + free(symlinks[i].linkname); + free(symlinks[i].target); + } + free(symlinks); } archive_read_free(archive); From e08bcbe0c9999b2ea24e080e9d598dc3a1c9745e Mon Sep 17 00:00:00 2001 From: martinRenou Date: Tue, 14 Oct 2025 15:12:26 +0200 Subject: [PATCH 3/4] Rename --- unpack.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/unpack.c b/unpack.c index 126c845..d0de905 100644 --- a/unpack.c +++ b/unpack.c @@ -47,10 +47,11 @@ static char* join_paths(const char *dir, const char *relative) { return buf; } -static const FileData *resolve_target_recursive(const FileData *files, size_t file_count, - const SymlinkInfo *symlinks, size_t symlink_count, - const char *target, int depth) -{ +static const FileData *resolve_symlink( + const FileData *files, size_t file_count, + const SymlinkInfo *symlinks, size_t symlink_count, + const char *target, int depth +) { if (!target || depth > 32) // prevent infinite recursion return NULL; @@ -67,7 +68,7 @@ static const FileData *resolve_target_recursive(const FileData *files, size_t fi for (size_t i = 0; i < symlink_count; i++) { if (strcmp(symlinks[i].linkname, target) == 0) { // Recurse into that symlink's target - return resolve_target_recursive(files, file_count, symlinks, symlink_count, + return resolve_symlink(files, file_count, symlinks, symlink_count, symlinks[i].target, depth + 1); } } @@ -207,7 +208,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { const char *linkname = symlinks[i].linkname; const char *target = symlinks[i].target; - const FileData *resolved = resolve_target_recursive(files, files_count, + const FileData *resolved = resolve_symlink(files, files_count, symlinks, symlink_count, target, 0); From 87b52f47d260e0b2d7a0ca7ca941f04c7c227aaf Mon Sep 17 00:00:00 2001 From: martinRenou Date: Tue, 14 Oct 2025 15:18:07 +0200 Subject: [PATCH 4/4] Iterate --- unpack.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/unpack.c b/unpack.c index d0de905..26401ed 100644 --- a/unpack.c +++ b/unpack.c @@ -213,8 +213,7 @@ ExtractedArchive* extract_archive(uint8_t* inputData, size_t inputSize ) { target, 0); if (!resolved) { - // error_message = "Failed to resolve symlink."; - error_message = target; + error_message = "Failed to resolve symlink."; return error_handler(result, error_message, archive); }