From a9a0ec77450056b570d8635863619685bff581d4 Mon Sep 17 00:00:00 2001 From: Spavid04 <-> Date: Fri, 9 Jun 2023 00:26:41 +0300 Subject: [PATCH 1/2] added file slicing support --- README.md | 18 +++++++++++ src/concatfs.c | 87 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 96 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 176a936..ed17a38 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,21 @@ +### What's changed in this fork + +I added "range" expressions to files in -concat- files. This way, you can slice an input file (and even reorder it!). \ +The new line format is `path:offset:length`, with `offset` and `length` being optional to ensure backwards compatibility. \ +Do remember that the same file can be specified multiple times (even in the original concatfs), and it will get concatenated multiple times, as expected. \ +The order in which the new file is created is the same as in the -concat- file. + +Some examples: +``` +file1.bin // use the entire file +file1.bin:10 // use file1.bin again, but starting at offset 10 +file2.bin:50:100 // use file2.bin, but only 100 bytes starting at offset 50 (inclusive). +file3.bin::1 // use file3.bin, but only the very first byte +file1.bin // use file1.bin in its entirety again +``` + +### Original readme + FUSE: Filesystem in Userspace for easy file concatenation of big files Files with the string "-concat-" anywhere in the filename are considered diff --git a/src/concatfs.c b/src/concatfs.c index ae50539..4db70af 100644 --- a/src/concatfs.c +++ b/src/concatfs.c @@ -44,6 +44,17 @@ #include #include #include +#include + +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +#define OFF_T_MAX ((((off_t)1 << (sizeof(off_t) * 8 - 2)) - 1) * 2 + 1) + +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define CLAMP(x, m, M) (MIN((M), MAX((x), (m)))) static char src_dir[PATH_MAX]; @@ -51,6 +62,7 @@ struct chunk { struct chunk * next; int fd; + off_t startOffset; off_t fsize; }; @@ -136,13 +148,67 @@ static struct concat_file * open_files_erase(int fd) return rv; } + +// tries to parse lines formatted like [path]:[start offset]:[length], with both [start offset] and :[length] being optional +// regex equivalent: ^(?[^:]+)(:(?\d+)?(:(?\d+)?)?)?$ +static bool try_parse_line_offsets(char* line, off_t* startOffset, off_t* length) +{ + struct stat stbuf; + bool statOk = false; + char* offsetsLineStart; + char t; + off_t s = -1; + off_t l = OFF_T_MAX; + + if (line == NULL || line[0] == '\0') return false; + + // find the point between path and offsets + offsetsLineStart = strchr(line, ':'); + + // cut the string into "two" strings (path \0 offsets) if needed, and stat() the path + if (offsetsLineStart != NULL) + { + t = *offsetsLineStart; + *offsetsLineStart = '\0'; + } + statOk = (stat(line, &stbuf) == 0); + /*if (offsetsLineStart != NULL) + { + // restore the string + *offsetsLineStart = t; + }*/ + if (!statOk) return false; + if (stbuf.st_size < 1) return false; // can't really use files with 0 size + + // try to parse the numbers + if (offsetsLineStart != NULL) + { + const char* lengthLineStart; + + // read the start offset + sscanf(offsetsLineStart + 1, "%jd", &s); + // find the length number and parse it + lengthLineStart = strchr(offsetsLineStart + 1, ':'); + if (lengthLineStart != NULL) + { + sscanf(lengthLineStart + 1, "%jd", &l); + } + } + + s = CLAMP(s, 0, stbuf.st_size - 1); + l = CLAMP(l, 1, stbuf.st_size - s); + + *startOffset = s; + *length = l; + return true; +} + static struct concat_file * open_concat_file(int fd, const char * path) { struct concat_file * rv = 0; char bpath[PATH_MAX+1]; char fpath[PATH_MAX+1]; char * base_dir; - struct stat stbuf; struct chunk * c = 0; FILE * fp; @@ -171,6 +237,8 @@ static struct concat_file * open_concat_file(int fd, const char * path) while (fgets(fpath, sizeof(fpath), fp)) { char tpath[PATH_MAX]; struct chunk * c_n; + off_t startOffset = 0; + off_t length = 0; fpath[strlen(fpath) - 1] = 0; @@ -179,16 +247,15 @@ static struct concat_file * open_concat_file(int fd, const char * path) } else { snprintf(tpath, sizeof(tpath), "%s/%s",base_dir, fpath); } - if (stat(tpath, &stbuf) == 0) { - rv->fsize += stbuf.st_size; - } else { - continue; - } + + if (!try_parse_line_offsets(tpath, &startOffset, &length)) continue; + rv->fsize += length; if (fd >= 0) { c_n = (struct chunk *) calloc(sizeof(struct chunk), 1); - c_n->fsize = stbuf.st_size; + c_n->startOffset = startOffset; + c_n->fsize = length; c_n->fd = open(tpath, O_RDONLY); if (c) { @@ -266,7 +333,8 @@ static int read_concat_file(int fd, void *buf, size_t count, off_t offset) } for (; c && count > c->fsize - offset; c = c->next) { - ssize_t rv = pread(c->fd, buf, c->fsize - offset, offset); + ssize_t rv; + rv = pread(c->fd, buf, c->fsize - offset, offset + c->startOffset); if (rv == c->fsize - offset) { buf += rv; @@ -282,7 +350,8 @@ static int read_concat_file(int fd, void *buf, size_t count, off_t offset) } if (c && count > 0) { - ssize_t rv = pread(c->fd, buf, count, offset); + ssize_t rv; + rv = pread(c->fd, buf, count, offset + c->startOffset); if (rv < 0) { return -errno; From 1d133f7400b821ce2a4bafa73ecb9a737df072fa Mon Sep 17 00:00:00 2001 From: Spavid04 <-> Date: Fri, 16 Jun 2023 03:28:03 +0300 Subject: [PATCH 2/2] updated readme with slicing info, before merge --- README.md | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/README.md b/README.md index ed17a38..bea7f7f 100644 --- a/README.md +++ b/README.md @@ -1,21 +1,3 @@ -### What's changed in this fork - -I added "range" expressions to files in -concat- files. This way, you can slice an input file (and even reorder it!). \ -The new line format is `path:offset:length`, with `offset` and `length` being optional to ensure backwards compatibility. \ -Do remember that the same file can be specified multiple times (even in the original concatfs), and it will get concatenated multiple times, as expected. \ -The order in which the new file is created is the same as in the -concat- file. - -Some examples: -``` -file1.bin // use the entire file -file1.bin:10 // use file1.bin again, but starting at offset 10 -file2.bin:50:100 // use file2.bin, but only 100 bytes starting at offset 50 (inclusive). -file3.bin::1 // use file3.bin, but only the very first byte -file1.bin // use file1.bin in its entirety again -``` - -### Original readme - FUSE: Filesystem in Userspace for easy file concatenation of big files Files with the string "-concat-" anywhere in the filename are considered @@ -46,6 +28,20 @@ file3.MTS on seperate lines. Empty lines or lines, which do not resolve to a file where a stat call succeeds, are ignored. +Simple "range" expressions are supported in -concat- files. This way, you can slice an input file (and even reorder it!). \ +The line format is `path:offset:length`, with both `offset` and `length` being optional. \ +Do remember that the same file can be specified multiple times, and it will get concatenated multiple times, as expected. \ +The order in which the new file is created is the same as in the -concat- file. + +Some examples: +``` +file1.bin // use the entire file +file1.bin:10 // use file1.bin again, but starting at offset 10 +file2.bin:50:100 // use file2.bin, but only 100 bytes starting at offset 50 (inclusive). +file3.bin::1 // use file3.bin, but only the very first byte +file1.bin // use file1.bin in its entirety again +``` + You will need to install libfuse-dev to compile: ```