From e4aba95c87af622b1de7ce07038ea1683cfd9a27 Mon Sep 17 00:00:00 2001 From: Gasper Tomazic Date: Mon, 9 Sep 2024 15:25:48 +0200 Subject: [PATCH 1/5] Interface for object that keeps (de)compress context alive for multiple (de)compressions --- ext/zstdruby/dictionary_compress.c | 122 ++++++++++++++++++++++ ext/zstdruby/dictionary_decompress.c | 151 +++++++++++++++++++++++++++ ext/zstdruby/main.c | 4 + lib/zstd-ruby/version.rb | 2 +- 4 files changed, 278 insertions(+), 1 deletion(-) create mode 100644 ext/zstdruby/dictionary_compress.c create mode 100644 ext/zstdruby/dictionary_decompress.c diff --git a/ext/zstdruby/dictionary_compress.c b/ext/zstdruby/dictionary_compress.c new file mode 100644 index 0000000..7a5b479 --- /dev/null +++ b/ext/zstdruby/dictionary_compress.c @@ -0,0 +1,122 @@ +#include "common.h" + +struct dictionary_compress_t { + ZSTD_CCtx* ctx; +}; + +static void +dictionary_compress_mark(void *p) +{ + struct dictionary_compress_t *dc = p; +} + +static void +dictionary_compress_free(void *p) +{ + struct dictionary_compress_t *dc = p; + + ZSTD_CCtx* ctx = dc->ctx; + if (ctx != NULL) { + ZSTD_freeCCtx(ctx); + } + + xfree(dc); +} + +static size_t +dictionary_compress_memsize(const void *p) +{ + return sizeof(struct dictionary_compress_t); +} + +#ifdef HAVE_RB_GC_MARK_MOVABLE +static void +dictionary_compress_compact(void *p) +{ + struct dictionary_compress_t *dc = p; +} +#endif + +static const rb_data_type_t dictionary_compress_type = { + "dictionary_compress", + { + dictionary_compress_mark, + dictionary_compress_free, + dictionary_compress_memsize, +#ifdef HAVE_RB_GC_MARK_MOVABLE + dictionary_compress_compact, +#endif + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY +}; + + +static VALUE +rb_dictionary_compress_allocate(VALUE klass) +{ + struct dictionary_compress_t* dc; + VALUE obj = TypedData_Make_Struct(klass, struct dictionary_compress_t, &dictionary_compress_type, dc); + dc->ctx = NULL; + return obj; +} + + +static VALUE +rb_dictionary_compress_initialize(int argc, VALUE *argv, VALUE obj) +{ + VALUE kwargs; + VALUE compression_level_value; + rb_scan_args(argc, argv, "00:", &kwargs); + + struct dictionary_compress_t* dc; + TypedData_Get_Struct(obj, struct dictionary_compress_t, &dictionary_compress_type, dc); + + ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + if (ctx == NULL) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); + } + + dc->ctx = ctx; + + compression_level_value = Qnil; + + set_compress_params(ctx, compression_level_value, kwargs); + + return obj; +} + + +static VALUE +rb_dictionary_compress_compress(VALUE obj, VALUE input_value) +{ + + struct dictionary_compress_t* dc; + TypedData_Get_Struct(obj, struct dictionary_compress_t, &dictionary_compress_type, dc); + + StringValue(input_value); + char* input_data = RSTRING_PTR(input_value); + size_t input_size = RSTRING_LEN(input_value); + + size_t max_compressed_size = ZSTD_compressBound(input_size); + VALUE output = rb_str_new(NULL, max_compressed_size); + char* output_data = RSTRING_PTR(output); + + size_t const ret = zstd_compress(dc->ctx, output_data, max_compressed_size, input_data, input_size, false); + if (ZSTD_isError(ret)) { + rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret)); + } + rb_str_resize(output, ret); + + return output; +} + + +extern VALUE rb_mZstd, cDictionaryCompress; +void +zstd_ruby_dictionary_compress_init(void) +{ + VALUE cDictionaryCompress = rb_define_class_under(rb_mZstd, "DictionaryCompress", rb_cObject); + rb_define_alloc_func(cDictionaryCompress, rb_dictionary_compress_allocate); + rb_define_method(cDictionaryCompress, "initialize", rb_dictionary_compress_initialize, -1); + rb_define_method(cDictionaryCompress, "compress", rb_dictionary_compress_compress, 1); +} diff --git a/ext/zstdruby/dictionary_decompress.c b/ext/zstdruby/dictionary_decompress.c new file mode 100644 index 0000000..c9a21ea --- /dev/null +++ b/ext/zstdruby/dictionary_decompress.c @@ -0,0 +1,151 @@ +#include "common.h" + +struct dictionary_decompress_t { + ZSTD_DCtx* ctx; +}; + +static void +dictionary_decompress_mark(void *p) +{ + struct dictionary_decompress_t *dd = p; +} + +static void +dictionary_decompress_free(void *p) +{ + struct dictionary_decompress_t *dd = p; + + ZSTD_DCtx* ctx = dd->ctx; + if (ctx != NULL) { + ZSTD_freeDCtx(ctx); + } + + xfree(dd); +} + +static size_t +dictionary_decompress_memsize(const void *p) +{ + return sizeof(struct dictionary_decompress_t); +} + +#ifdef HAVE_RB_GC_MARK_MOVABLE +static void +dictionary_decompress_compact(void *p) +{ + struct dictionary_decompress_t *dd = p; +} +#endif + +static const rb_data_type_t dictionary_decompress_type = { + "dictionary_decompress", + { + dictionary_decompress_mark, + dictionary_decompress_free, + dictionary_decompress_memsize, +#ifdef HAVE_RB_GC_MARK_MOVABLE + dictionary_decompress_compact, +#endif + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY +}; + + +static VALUE +rb_dictionary_decompress_allocate(VALUE klass) +{ + struct dictionary_decompress_t* dd; + VALUE obj = TypedData_Make_Struct(klass, struct dictionary_decompress_t, &dictionary_decompress_type, dd); + dd->ctx = NULL; + return obj; +} + + +static VALUE +rb_dictionary_decompress_initialize(int argc, VALUE *argv, VALUE obj) +{ + VALUE kwargs; + rb_scan_args(argc, argv, "00:", &kwargs); + + struct dictionary_decompress_t* dd; + TypedData_Get_Struct(obj, struct dictionary_decompress_t, &dictionary_decompress_type, dd); + + ZSTD_DCtx* const ctx = ZSTD_createDCtx(); + if (ctx == NULL) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDCtx error"); + } + + dd->ctx = ctx; + + set_decompress_params(ctx, kwargs); + + return obj; +} + + +static VALUE decompress_buffered(ZSTD_DCtx* dctx, const char* input_data, size_t input_size) +{ + ZSTD_inBuffer input = { input_data, input_size, 0 }; + VALUE result = rb_str_new(0, 0); + + while (input.pos < input.size) { + ZSTD_outBuffer output = { NULL, 0, 0 }; + output.size += ZSTD_DStreamOutSize(); + VALUE output_string = rb_str_new(NULL, output.size); + output.dst = RSTRING_PTR(output_string); + + size_t ret = zstd_stream_decompress(dctx, &output, &input, false); + if (ZSTD_isError(ret)) { + ZSTD_freeDCtx(dctx); + rb_raise(rb_eRuntimeError, "%s: %s", "ZSTD_decompressStream failed", ZSTD_getErrorName(ret)); + } + rb_str_cat(result, output.dst, output.pos); + RB_GC_GUARD(output_string); + } + ZSTD_freeDCtx(dctx); + return result; +} + + +static VALUE +rb_dictionary_decompress_decompress(VALUE obj, VALUE input_value) +{ + + struct dictionary_decompress_t* dd; + TypedData_Get_Struct(obj, struct dictionary_decompress_t, &dictionary_decompress_type, dd); + + StringValue(input_value); + char* input_data = RSTRING_PTR(input_value); + size_t input_size = RSTRING_LEN(input_value); + + unsigned long long const uncompressed_size = ZSTD_getFrameContentSize(input_data, input_size); + if (uncompressed_size == ZSTD_CONTENTSIZE_ERROR) { + rb_raise(rb_eRuntimeError, "%s: %s", "not compressed by zstd", ZSTD_getErrorName(uncompressed_size)); + } + // ZSTD_decompressStream may be called multiple times when ZSTD_CONTENTSIZE_UNKNOWN, causing slowness. + // Therefore, we will not standardize on ZSTD_decompressStream + if (uncompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { + return decompress_buffered(dd->ctx, input_data, input_size); + } + + VALUE output = rb_str_new(NULL, uncompressed_size); + char* output_data = RSTRING_PTR(output); + + size_t const decompress_size = zstd_decompress(dd->ctx, output_data, uncompressed_size, input_data, input_size, false); + if (ZSTD_isError(decompress_size)) { + rb_raise(rb_eRuntimeError, "%s: %s", "decompress error", ZSTD_getErrorName(decompress_size)); + } + + return output; +} + + +extern VALUE rb_mZstd, cDictionaryDecompress; +void +zstd_ruby_dictionary_decompress_init(void) +{ + VALUE cDictionaryDecompress = rb_define_class_under(rb_mZstd, "DictionaryDecompress", rb_cObject); + rb_define_alloc_func(cDictionaryDecompress, rb_dictionary_decompress_allocate); + rb_define_method(cDictionaryDecompress, "initialize", rb_dictionary_decompress_initialize, -1); + rb_define_method(cDictionaryDecompress, "decompress", rb_dictionary_decompress_decompress, 1); +} diff --git a/ext/zstdruby/main.c b/ext/zstdruby/main.c index 0f2198b..0c2dad3 100644 --- a/ext/zstdruby/main.c +++ b/ext/zstdruby/main.c @@ -5,6 +5,8 @@ void zstd_ruby_init(void); void zstd_ruby_skippable_frame_init(void); void zstd_ruby_streaming_compress_init(void); void zstd_ruby_streaming_decompress_init(void); +void zstd_ruby_dictionary_compress_init(void); +void zstd_ruby_dictionary_decompress_init(void); void Init_zstdruby(void) @@ -18,4 +20,6 @@ Init_zstdruby(void) zstd_ruby_skippable_frame_init(); zstd_ruby_streaming_compress_init(); zstd_ruby_streaming_decompress_init(); + zstd_ruby_dictionary_compress_init(); + zstd_ruby_dictionary_decompress_init(); } diff --git a/lib/zstd-ruby/version.rb b/lib/zstd-ruby/version.rb index 614329e..c588151 100644 --- a/lib/zstd-ruby/version.rb +++ b/lib/zstd-ruby/version.rb @@ -1,3 +1,3 @@ module Zstd - VERSION = "1.5.6.6" + VERSION = "1.5.7.0" end From 779a677f9c90a519af658e095d5710cfdbc571af Mon Sep 17 00:00:00 2001 From: Gasper Tomazic Date: Mon, 9 Sep 2024 15:34:31 +0200 Subject: [PATCH 2/5] Merge dictionary (de)compress into single object and rename it to SimpleCompress --- ext/zstdruby/dictionary_compress.c | 122 ------------------- ext/zstdruby/simple_compress.c | 187 +++++++++++++++++++++++++++++ 2 files changed, 187 insertions(+), 122 deletions(-) delete mode 100644 ext/zstdruby/dictionary_compress.c create mode 100644 ext/zstdruby/simple_compress.c diff --git a/ext/zstdruby/dictionary_compress.c b/ext/zstdruby/dictionary_compress.c deleted file mode 100644 index 7a5b479..0000000 --- a/ext/zstdruby/dictionary_compress.c +++ /dev/null @@ -1,122 +0,0 @@ -#include "common.h" - -struct dictionary_compress_t { - ZSTD_CCtx* ctx; -}; - -static void -dictionary_compress_mark(void *p) -{ - struct dictionary_compress_t *dc = p; -} - -static void -dictionary_compress_free(void *p) -{ - struct dictionary_compress_t *dc = p; - - ZSTD_CCtx* ctx = dc->ctx; - if (ctx != NULL) { - ZSTD_freeCCtx(ctx); - } - - xfree(dc); -} - -static size_t -dictionary_compress_memsize(const void *p) -{ - return sizeof(struct dictionary_compress_t); -} - -#ifdef HAVE_RB_GC_MARK_MOVABLE -static void -dictionary_compress_compact(void *p) -{ - struct dictionary_compress_t *dc = p; -} -#endif - -static const rb_data_type_t dictionary_compress_type = { - "dictionary_compress", - { - dictionary_compress_mark, - dictionary_compress_free, - dictionary_compress_memsize, -#ifdef HAVE_RB_GC_MARK_MOVABLE - dictionary_compress_compact, -#endif - }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY -}; - - -static VALUE -rb_dictionary_compress_allocate(VALUE klass) -{ - struct dictionary_compress_t* dc; - VALUE obj = TypedData_Make_Struct(klass, struct dictionary_compress_t, &dictionary_compress_type, dc); - dc->ctx = NULL; - return obj; -} - - -static VALUE -rb_dictionary_compress_initialize(int argc, VALUE *argv, VALUE obj) -{ - VALUE kwargs; - VALUE compression_level_value; - rb_scan_args(argc, argv, "00:", &kwargs); - - struct dictionary_compress_t* dc; - TypedData_Get_Struct(obj, struct dictionary_compress_t, &dictionary_compress_type, dc); - - ZSTD_CCtx* const ctx = ZSTD_createCCtx(); - if (ctx == NULL) { - rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); - } - - dc->ctx = ctx; - - compression_level_value = Qnil; - - set_compress_params(ctx, compression_level_value, kwargs); - - return obj; -} - - -static VALUE -rb_dictionary_compress_compress(VALUE obj, VALUE input_value) -{ - - struct dictionary_compress_t* dc; - TypedData_Get_Struct(obj, struct dictionary_compress_t, &dictionary_compress_type, dc); - - StringValue(input_value); - char* input_data = RSTRING_PTR(input_value); - size_t input_size = RSTRING_LEN(input_value); - - size_t max_compressed_size = ZSTD_compressBound(input_size); - VALUE output = rb_str_new(NULL, max_compressed_size); - char* output_data = RSTRING_PTR(output); - - size_t const ret = zstd_compress(dc->ctx, output_data, max_compressed_size, input_data, input_size, false); - if (ZSTD_isError(ret)) { - rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret)); - } - rb_str_resize(output, ret); - - return output; -} - - -extern VALUE rb_mZstd, cDictionaryCompress; -void -zstd_ruby_dictionary_compress_init(void) -{ - VALUE cDictionaryCompress = rb_define_class_under(rb_mZstd, "DictionaryCompress", rb_cObject); - rb_define_alloc_func(cDictionaryCompress, rb_dictionary_compress_allocate); - rb_define_method(cDictionaryCompress, "initialize", rb_dictionary_compress_initialize, -1); - rb_define_method(cDictionaryCompress, "compress", rb_dictionary_compress_compress, 1); -} diff --git a/ext/zstdruby/simple_compress.c b/ext/zstdruby/simple_compress.c new file mode 100644 index 0000000..c622fc2 --- /dev/null +++ b/ext/zstdruby/simple_compress.c @@ -0,0 +1,187 @@ +#include "common.h" + +struct simple_compress_t { + ZSTD_CCtx* cctx; + ZSTD_CCtx* dctx; +}; + +static void +simple_compress_mark(void *p) +{ + struct simple_compress_t *sc = p; +} + +static void +simple_compress_free(void *p) +{ + struct simple_compress_t *sc = p; + + ZSTD_CCtx* cctx = sc->cctx; + if (ctx != NULL) { + ZSTD_freeCCtx(cctx); + } + + ZSTD_DCtx* dctx = sc->dctx; + if (dctx != NULL) { + ZSTD_freeCCtx(dctx); + } + + xfree(sc); +} + +static size_t +simple_compress_memsize(const void *p) +{ + return sizeof(struct simple_compress_t); +} + +#ifdef HAVE_RB_GC_MARK_MOVABLE +static void +simple_compress_compact(void *p) +{ + struct simple_compress_t *sc = p; +} +#endif + +static const rb_data_type_t simple_compress_type = { + "simple_compress", + { + simple_compress_mark, + simple_compress_free, + simple_compress_memsize, +#ifdef HAVE_RB_GC_MARK_MOVABLE + simple_compress_compact, +#endif + }, + 0, 0, RUBY_TYPED_FREE_IMMEDIATELY +}; + + +static VALUE +rb_simple_compress_allocate(VALUE klass) +{ + struct simple_compress_t* sc; + VALUE obj = TypedData_Make_Struct(klass, struct simple_compress_t, &simple_compress_type, sc); + sc->cctx = NULL; + sc->dctx = NULL; + return obj; +} + + +static VALUE +rb_simple_compress_initialize(int argc, VALUE *argv, VALUE obj) +{ + VALUE kwargs; + VALUE compression_level_value; + rb_scan_args(argc, argv, "00:", &kwargs); + + struct simple_compress_t* sc; + TypedData_Get_Struct(obj, struct simple_compress_t, &simple_compress_type, sc); + + ZSTD_CCtx* const ctx = ZSTD_createCCtx(); + if (ctx == NULL) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); + } + + sc->ctx = ctx; + + compression_level_value = Qnil; + + set_compress_params(ctx, compression_level_value, kwargs); + + return obj; +} + + +static VALUE +rb_simple_compress_compress(VALUE obj, VALUE input_value) +{ + + struct simple_compress_t* sc; + TypedData_Get_Struct(obj, struct simple_compress_t, &simple_compress_type, sc); + + StringValue(input_value); + char* input_data = RSTRING_PTR(input_value); + size_t input_size = RSTRING_LEN(input_value); + + size_t max_compressed_size = ZSTD_compressBound(input_size); + VALUE output = rb_str_new(NULL, max_compressed_size); + char* output_data = RSTRING_PTR(output); + + size_t const ret = zstd_compress(sc->cctx, output_data, max_compressed_size, input_data, input_size, false); + if (ZSTD_isError(ret)) { + rb_raise(rb_eRuntimeError, "compress error error code: %s", ZSTD_getErrorName(ret)); + } + rb_str_resize(output, ret); + + return output; +} + + +static VALUE decompress_buffered(ZSTD_DCtx* dctx, const char* input_data, size_t input_size) +{ + ZSTD_inBuffer input = { input_data, input_size, 0 }; + VALUE result = rb_str_new(0, 0); + + while (input.pos < input.size) { + ZSTD_outBuffer output = { NULL, 0, 0 }; + output.size += ZSTD_DStreamOutSize(); + VALUE output_string = rb_str_new(NULL, output.size); + output.dst = RSTRING_PTR(output_string); + + size_t ret = zstd_stream_decompress(dctx, &output, &input, false); + if (ZSTD_isError(ret)) { + ZSTD_freeDCtx(dctx); + rb_raise(rb_eRuntimeError, "%s: %s", "ZSTD_decompressStream failed", ZSTD_getErrorName(ret)); + } + rb_str_cat(result, output.dst, output.pos); + RB_GC_GUARD(output_string); + } + ZSTD_freeDCtx(dctx); + return result; +} + + +static VALUE +rb_simple_compress_decompress(VALUE obj, VALUE input_value) +{ + + struct simple_compress_t* sc; + TypedData_Get_Struct(obj, struct simple_compress_t, &simple_compress_type, sc); + + StringValue(input_value); + char* input_data = RSTRING_PTR(input_value); + size_t input_size = RSTRING_LEN(input_value); + + unsigned long long const uncompressed_size = ZSTD_getFrameContentSize(input_data, input_size); + if (uncompressed_size == ZSTD_CONTENTSIZE_ERROR) { + rb_raise(rb_eRuntimeError, "%s: %s", "not compressed by zstd", ZSTD_getErrorName(uncompressed_size)); + } + // ZSTD_decompressStream may be called multiple times when ZSTD_CONTENTSIZE_UNKNOWN, causing slowness. + // Therefore, we will not standardize on ZSTD_decompressStream + if (uncompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { + return decompress_buffered(sc->dctx, input_data, input_size); + } + + VALUE output = rb_str_new(NULL, uncompressed_size); + char* output_data = RSTRING_PTR(output); + + size_t const decompress_size = zstd_decompress(sc->dctx, output_data, uncompressed_size, input_data, input_size, false); + if (ZSTD_isError(decompress_size)) { + rb_raise(rb_eRuntimeError, "%s: %s", "decompress error", ZSTD_getErrorName(decompress_size)); + } + + return output; +} + + +extern VALUE rb_mZstd, cSimpleCompress; +void +zstd_ruby_simple_compress_init(void) +{ + VALUE cSimpleCompress = rb_define_class_under(rb_mZstd, "SimpleCompress", rb_cObject); + rb_define_alloc_func(cSimpleCompress, rb_simple_compress_allocate); + rb_define_method(cSimpleCompress, "initialize", rb_simple_compress_initialize, -1); + rb_define_method(cSimpleCompress, "compress", rb_simple_compress_compress, 1); + rb_define_method(cSimpleCompress, "decompress", rb_simple_compress_decompress, 1); +} From 28e59322e172af3de9a579299aacc56bb85e3430 Mon Sep 17 00:00:00 2001 From: Gasper Tomazic Date: Tue, 10 Sep 2024 10:41:38 +0200 Subject: [PATCH 3/5] Fix parsing SimpleCompress kwargs --- README.md | 8 ++ ext/zstdruby/main.c | 6 +- ext/zstdruby/simple_compress.c | 54 +++++++++--- lib/zstd-ruby/version.rb | 2 +- spec/zstd-ruby-simple_compress_spec.rb | 112 +++++++++++++++++++++++++ 5 files changed, 166 insertions(+), 16 deletions(-) create mode 100644 spec/zstd-ruby-simple_compress_spec.rb diff --git a/README.md b/README.md index 8058701..77b64d8 100644 --- a/README.md +++ b/README.md @@ -49,6 +49,14 @@ compressed_data = Zstd.compress(data, level: complession_level) # default compre compressed_using_dict = Zstd.compress("", dict: File.read('dictionary_file')) ``` +#### Keep dictionary loaded +```ruby +de_compressor = Zstd::SimpleCompress.new(dict: File.read('dictionary_file')) + +compressed_string = de_compressor.compress("abc") +decompressed_string = de_compressor.decompress(compressed_string) +``` + #### Streaming Compression ```ruby stream = Zstd::StreamingCompress.new diff --git a/ext/zstdruby/main.c b/ext/zstdruby/main.c index 0c2dad3..3c650bd 100644 --- a/ext/zstdruby/main.c +++ b/ext/zstdruby/main.c @@ -5,8 +5,7 @@ void zstd_ruby_init(void); void zstd_ruby_skippable_frame_init(void); void zstd_ruby_streaming_compress_init(void); void zstd_ruby_streaming_decompress_init(void); -void zstd_ruby_dictionary_compress_init(void); -void zstd_ruby_dictionary_decompress_init(void); +void zstd_ruby_simple_compress_init(void); void Init_zstdruby(void) @@ -20,6 +19,5 @@ Init_zstdruby(void) zstd_ruby_skippable_frame_init(); zstd_ruby_streaming_compress_init(); zstd_ruby_streaming_decompress_init(); - zstd_ruby_dictionary_compress_init(); - zstd_ruby_dictionary_decompress_init(); + zstd_ruby_simple_compress_init(); } diff --git a/ext/zstdruby/simple_compress.c b/ext/zstdruby/simple_compress.c index c622fc2..631d336 100644 --- a/ext/zstdruby/simple_compress.c +++ b/ext/zstdruby/simple_compress.c @@ -2,7 +2,7 @@ struct simple_compress_t { ZSTD_CCtx* cctx; - ZSTD_CCtx* dctx; + ZSTD_DCtx* dctx; }; static void @@ -17,13 +17,13 @@ simple_compress_free(void *p) struct simple_compress_t *sc = p; ZSTD_CCtx* cctx = sc->cctx; - if (ctx != NULL) { + if (cctx != NULL) { ZSTD_freeCCtx(cctx); } ZSTD_DCtx* dctx = sc->dctx; if (dctx != NULL) { - ZSTD_freeCCtx(dctx); + ZSTD_freeDCtx(dctx); } xfree(sc); @@ -72,22 +72,56 @@ static VALUE rb_simple_compress_initialize(int argc, VALUE *argv, VALUE obj) { VALUE kwargs; - VALUE compression_level_value; rb_scan_args(argc, argv, "00:", &kwargs); struct simple_compress_t* sc; TypedData_Get_Struct(obj, struct simple_compress_t, &simple_compress_type, sc); - ZSTD_CCtx* const ctx = ZSTD_createCCtx(); - if (ctx == NULL) { + // Build (de)compress contexts + + ZSTD_CCtx* const cctx = ZSTD_createCCtx(); + if (cctx == NULL) { rb_raise(rb_eRuntimeError, "%s", "ZSTD_createCCtx error"); } - sc->ctx = ctx; + sc->cctx = cctx; + + ZSTD_DCtx* const dctx = ZSTD_createDCtx(); + if (dctx == NULL) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDCtx error"); + } + + sc->dctx = dctx; + + // Apply compression level and dictionary - compression_level_value = Qnil; + ID kwargs_keys[2]; + kwargs_keys[0] = rb_intern("level"); + kwargs_keys[1] = rb_intern("dict"); + VALUE kwargs_values[2]; + rb_get_kwargs(kwargs, kwargs_keys, 0, 2, kwargs_values); - set_compress_params(ctx, compression_level_value, kwargs); + int compression_level = ZSTD_CLEVEL_DEFAULT; + if (kwargs_values[0] != Qundef && kwargs_values[0] != Qnil) { + compression_level = convert_compression_level(kwargs_values[0]); + } + + ZSTD_CCtx_setParameter(cctx, ZSTD_c_compressionLevel, compression_level); + + if (kwargs_values[1] != Qundef && kwargs_values[1] != Qnil) { + char* dict_buffer = RSTRING_PTR(kwargs_values[1]); + size_t dict_size = RSTRING_LEN(kwargs_values[1]); + + size_t load_cdict_ret = ZSTD_CCtx_loadDictionary(cctx, dict_buffer, dict_size); + if (ZSTD_isError(load_cdict_ret)) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_CCtx_loadDictionary failed"); + } + + size_t load_ddict_ret = ZSTD_DCtx_loadDictionary(dctx, dict_buffer, dict_size); + if (ZSTD_isError(load_ddict_ret)) { + rb_raise(rb_eRuntimeError, "%s", "ZSTD_DCtx_loadDictionary failed"); + } + } return obj; } @@ -131,13 +165,11 @@ static VALUE decompress_buffered(ZSTD_DCtx* dctx, const char* input_data, size_t size_t ret = zstd_stream_decompress(dctx, &output, &input, false); if (ZSTD_isError(ret)) { - ZSTD_freeDCtx(dctx); rb_raise(rb_eRuntimeError, "%s: %s", "ZSTD_decompressStream failed", ZSTD_getErrorName(ret)); } rb_str_cat(result, output.dst, output.pos); RB_GC_GUARD(output_string); } - ZSTD_freeDCtx(dctx); return result; } diff --git a/lib/zstd-ruby/version.rb b/lib/zstd-ruby/version.rb index c588151..614329e 100644 --- a/lib/zstd-ruby/version.rb +++ b/lib/zstd-ruby/version.rb @@ -1,3 +1,3 @@ module Zstd - VERSION = "1.5.7.0" + VERSION = "1.5.6.6" end diff --git a/spec/zstd-ruby-simple_compress_spec.rb b/spec/zstd-ruby-simple_compress_spec.rb new file mode 100644 index 0000000..4537c8c --- /dev/null +++ b/spec/zstd-ruby-simple_compress_spec.rb @@ -0,0 +1,112 @@ +require "spec_helper" +require 'zstd-ruby' + +# Generate dictionay methods +# https://github.com/facebook/zstd#the-case-for-small-data-compression +# https://github.com/facebook/zstd/releases/tag/v1.1.3 + +RSpec.describe Zstd::SimpleCompress do + describe 'simple (de)compress' do + let(:user_json) do + File.read("#{__dir__}/user_springmt.json") + end + + it 'should work' do + compressor = Zstd::SimpleCompress.new() + + compressed = compressor.compress(user_json) + decompressed = compressor.decompress(compressed) + + expect(compressed.length).to be < user_json.length + expect(user_json).to eq(decompressed) + end + + it 'should work with simple string' do + compressor = Zstd::SimpleCompress.new() + + compressed = compressor.compress("abc") + expect("abc").to eq(compressor.decompress(compressed)) + end + + it 'should work with blank input' do + compressor = Zstd::SimpleCompress.new() + + compressed = compressor.compress("") + expect("").to eq(compressor.decompress(compressed)) + end + + it 'should work with long strings' do + compressor = Zstd::SimpleCompress.new() + + long_string = "a" * 400_000 + compressed = compressor.compress(long_string) + expect(long_string).to eq(compressor.decompress(compressed)) + end + + it 'should support compression levels' do + compressor = Zstd::SimpleCompress.new() + compressor_l10 = Zstd::SimpleCompress.new(level: 10) + + compressed = compressor.compress(user_json) + compressed_l10 = compressor_l10.compress(user_json) + + expect(compressed_l10.length).to be < compressed.length + expect(user_json).to eq(compressor.decompress(compressed)) + expect(user_json).to eq(compressor_l10.decompress(compressed_l10)) + end + end + + describe 'compress_using_dict' do + let(:user_json) do + File.read("#{__dir__}/user_springmt.json") + end + let(:dictionary) do + File.read("#{__dir__}/dictionary") + end + + it 'should work' do + compressor = Zstd::SimpleCompress.new(dict: dictionary) + + compressed = compressor.compress(user_json) + decompressed = compressor.decompress(compressed) + + expect(compressed.length).to be < user_json.length + expect(user_json).to eq(decompressed) + end + + it 'should work with simple string' do + compressor = Zstd::SimpleCompress.new(dict: dictionary) + + compressed = compressor.compress("abc") + expect("abc").to eq(compressor.decompress(compressed)) + end + + it 'should work with blank input' do + compressor = Zstd::SimpleCompress.new(dict: dictionary) + + compressed = compressor.compress("") + expect("").to eq(compressor.decompress(compressed)) + end + + it 'should work with long strings' do + compressor = Zstd::SimpleCompress.new(dict: dictionary) + + long_string = "a" * 400_000 + compressed = compressor.compress(long_string) + expect(long_string).to eq(compressor.decompress(compressed)) + end + + it 'should support compression levels' do + compressor = Zstd::SimpleCompress.new(dict: dictionary) + compressor_l10 = Zstd::SimpleCompress.new(level: 10, dict: dictionary) + + compressed = compressor.compress(user_json) + compressed_l10 = compressor_l10.compress(user_json) + + expect(compressed_l10.length).to be < compressed.length + expect(user_json).to eq(compressor.decompress(compressed)) + expect(user_json).to eq(compressor_l10.decompress(compressed_l10)) + end + end + +end From 81cd9669f42a0310999c30914e08cf0b0a601158 Mon Sep 17 00:00:00 2001 From: Gasper Tomazic Date: Tue, 10 Sep 2024 10:50:48 +0200 Subject: [PATCH 4/5] Remove dictionary_decompress.c --- ext/zstdruby/dictionary_decompress.c | 151 --------------------------- 1 file changed, 151 deletions(-) delete mode 100644 ext/zstdruby/dictionary_decompress.c diff --git a/ext/zstdruby/dictionary_decompress.c b/ext/zstdruby/dictionary_decompress.c deleted file mode 100644 index c9a21ea..0000000 --- a/ext/zstdruby/dictionary_decompress.c +++ /dev/null @@ -1,151 +0,0 @@ -#include "common.h" - -struct dictionary_decompress_t { - ZSTD_DCtx* ctx; -}; - -static void -dictionary_decompress_mark(void *p) -{ - struct dictionary_decompress_t *dd = p; -} - -static void -dictionary_decompress_free(void *p) -{ - struct dictionary_decompress_t *dd = p; - - ZSTD_DCtx* ctx = dd->ctx; - if (ctx != NULL) { - ZSTD_freeDCtx(ctx); - } - - xfree(dd); -} - -static size_t -dictionary_decompress_memsize(const void *p) -{ - return sizeof(struct dictionary_decompress_t); -} - -#ifdef HAVE_RB_GC_MARK_MOVABLE -static void -dictionary_decompress_compact(void *p) -{ - struct dictionary_decompress_t *dd = p; -} -#endif - -static const rb_data_type_t dictionary_decompress_type = { - "dictionary_decompress", - { - dictionary_decompress_mark, - dictionary_decompress_free, - dictionary_decompress_memsize, -#ifdef HAVE_RB_GC_MARK_MOVABLE - dictionary_decompress_compact, -#endif - }, - 0, 0, RUBY_TYPED_FREE_IMMEDIATELY -}; - - -static VALUE -rb_dictionary_decompress_allocate(VALUE klass) -{ - struct dictionary_decompress_t* dd; - VALUE obj = TypedData_Make_Struct(klass, struct dictionary_decompress_t, &dictionary_decompress_type, dd); - dd->ctx = NULL; - return obj; -} - - -static VALUE -rb_dictionary_decompress_initialize(int argc, VALUE *argv, VALUE obj) -{ - VALUE kwargs; - rb_scan_args(argc, argv, "00:", &kwargs); - - struct dictionary_decompress_t* dd; - TypedData_Get_Struct(obj, struct dictionary_decompress_t, &dictionary_decompress_type, dd); - - ZSTD_DCtx* const ctx = ZSTD_createDCtx(); - if (ctx == NULL) { - rb_raise(rb_eRuntimeError, "%s", "ZSTD_createDCtx error"); - } - - dd->ctx = ctx; - - set_decompress_params(ctx, kwargs); - - return obj; -} - - -static VALUE decompress_buffered(ZSTD_DCtx* dctx, const char* input_data, size_t input_size) -{ - ZSTD_inBuffer input = { input_data, input_size, 0 }; - VALUE result = rb_str_new(0, 0); - - while (input.pos < input.size) { - ZSTD_outBuffer output = { NULL, 0, 0 }; - output.size += ZSTD_DStreamOutSize(); - VALUE output_string = rb_str_new(NULL, output.size); - output.dst = RSTRING_PTR(output_string); - - size_t ret = zstd_stream_decompress(dctx, &output, &input, false); - if (ZSTD_isError(ret)) { - ZSTD_freeDCtx(dctx); - rb_raise(rb_eRuntimeError, "%s: %s", "ZSTD_decompressStream failed", ZSTD_getErrorName(ret)); - } - rb_str_cat(result, output.dst, output.pos); - RB_GC_GUARD(output_string); - } - ZSTD_freeDCtx(dctx); - return result; -} - - -static VALUE -rb_dictionary_decompress_decompress(VALUE obj, VALUE input_value) -{ - - struct dictionary_decompress_t* dd; - TypedData_Get_Struct(obj, struct dictionary_decompress_t, &dictionary_decompress_type, dd); - - StringValue(input_value); - char* input_data = RSTRING_PTR(input_value); - size_t input_size = RSTRING_LEN(input_value); - - unsigned long long const uncompressed_size = ZSTD_getFrameContentSize(input_data, input_size); - if (uncompressed_size == ZSTD_CONTENTSIZE_ERROR) { - rb_raise(rb_eRuntimeError, "%s: %s", "not compressed by zstd", ZSTD_getErrorName(uncompressed_size)); - } - // ZSTD_decompressStream may be called multiple times when ZSTD_CONTENTSIZE_UNKNOWN, causing slowness. - // Therefore, we will not standardize on ZSTD_decompressStream - if (uncompressed_size == ZSTD_CONTENTSIZE_UNKNOWN) { - return decompress_buffered(dd->ctx, input_data, input_size); - } - - VALUE output = rb_str_new(NULL, uncompressed_size); - char* output_data = RSTRING_PTR(output); - - size_t const decompress_size = zstd_decompress(dd->ctx, output_data, uncompressed_size, input_data, input_size, false); - if (ZSTD_isError(decompress_size)) { - rb_raise(rb_eRuntimeError, "%s: %s", "decompress error", ZSTD_getErrorName(decompress_size)); - } - - return output; -} - - -extern VALUE rb_mZstd, cDictionaryDecompress; -void -zstd_ruby_dictionary_decompress_init(void) -{ - VALUE cDictionaryDecompress = rb_define_class_under(rb_mZstd, "DictionaryDecompress", rb_cObject); - rb_define_alloc_func(cDictionaryDecompress, rb_dictionary_decompress_allocate); - rb_define_method(cDictionaryDecompress, "initialize", rb_dictionary_decompress_initialize, -1); - rb_define_method(cDictionaryDecompress, "decompress", rb_dictionary_decompress_decompress, 1); -} From 32f67ac977ce2e39b14c3fa18557dc1781f5b502 Mon Sep 17 00:00:00 2001 From: Gasper Tomazic Date: Tue, 10 Sep 2024 13:43:03 +0200 Subject: [PATCH 5/5] Add version modifier --- lib/zstd-ruby/version.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/zstd-ruby/version.rb b/lib/zstd-ruby/version.rb index 614329e..3ee59ad 100644 --- a/lib/zstd-ruby/version.rb +++ b/lib/zstd-ruby/version.rb @@ -1,3 +1,3 @@ module Zstd - VERSION = "1.5.6.6" + VERSION = "1.5.6.6.pl" end