From 9023804337cd641f118ae860be4a61a77965903a Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Mon, 12 Jan 2026 00:34:39 +0900 Subject: [PATCH] GH-48820: [Ruby] Add support for writing null array --- ruby/red-arrow-format/Rakefile | 2 +- ruby/red-arrow-format/lib/arrow-format.rb | 2 + .../lib/arrow-format/array.rb | 62 ++++++---- .../lib/arrow-format/bitmap.rb | 4 + .../lib/arrow-format/field.rb | 26 +++++ .../lib/arrow-format/file-reader.rb | 23 ++-- .../lib/arrow-format/file-writer.rb | 56 +++++++++ .../lib/arrow-format/flat-buffers.rb | 53 +++++++++ .../org/apache/arrow/flatbuf/binary.rb | 6 +- .../org/apache/arrow/flatbuf/binary_view.rb | 6 +- .../org/apache/arrow/flatbuf/block.rb | 12 +- .../apache/arrow/flatbuf/body_compression.rb | 9 +- .../arrow/flatbuf/body_compression_method.rb | 4 +- .../org/apache/arrow/flatbuf/bool.rb | 6 +- .../org/apache/arrow/flatbuf/buffer.rb | 9 +- .../apache/arrow/flatbuf/compression_type.rb | 2 +- .../org/apache/arrow/flatbuf/date.rb | 8 +- .../org/apache/arrow/flatbuf/date_unit.rb | 2 +- .../org/apache/arrow/flatbuf/decimal.rb | 10 +- .../apache/arrow/flatbuf/dictionary_batch.rb | 10 +- .../arrow/flatbuf/dictionary_encoding.rb | 13 ++- .../apache/arrow/flatbuf/dictionary_kind.rb | 2 +- .../org/apache/arrow/flatbuf/duration.rb | 8 +- .../org/apache/arrow/flatbuf/endianness.rb | 2 +- .../org/apache/arrow/flatbuf/feature.rb | 2 +- .../org/apache/arrow/flatbuf/field.rb | 14 ++- .../org/apache/arrow/flatbuf/field_node.rb | 9 +- .../apache/arrow/flatbuf/fixed_size_binary.rb | 8 +- .../apache/arrow/flatbuf/fixed_size_list.rb | 8 +- .../apache/arrow/flatbuf/floating_point.rb | 8 +- .../org/apache/arrow/flatbuf/footer.rb | 24 +++- .../org/apache/arrow/flatbuf/int.rb | 9 +- .../org/apache/arrow/flatbuf/interval.rb | 8 +- .../org/apache/arrow/flatbuf/interval_unit.rb | 2 +- .../org/apache/arrow/flatbuf/key_value.rb | 9 +- .../org/apache/arrow/flatbuf/large_binary.rb | 6 +- .../org/apache/arrow/flatbuf/large_list.rb | 6 +- .../apache/arrow/flatbuf/large_list_view.rb | 6 +- .../org/apache/arrow/flatbuf/large_utf8.rb | 6 +- .../org/apache/arrow/flatbuf/list.rb | 6 +- .../org/apache/arrow/flatbuf/list_view.rb | 6 +- .../org/apache/arrow/flatbuf/map.rb | 10 +- .../org/apache/arrow/flatbuf/message.rb | 24 +++- .../apache/arrow/flatbuf/message_header.rb | 2 +- .../apache/arrow/flatbuf/metadata_version.rb | 2 +- .../org/apache/arrow/flatbuf/null.rb | 6 +- .../org/apache/arrow/flatbuf/precision.rb | 2 +- .../org/apache/arrow/flatbuf/record_batch.rb | 14 ++- .../apache/arrow/flatbuf/run_end_encoded.rb | 6 +- .../org/apache/arrow/flatbuf/schema.rb | 11 +- .../flatbuf/sparse_matrix_compressed_axis.rb | 2 +- .../arrow/flatbuf/sparse_matrix_index_csx.rb | 14 ++- .../org/apache/arrow/flatbuf/sparse_tensor.rb | 14 ++- .../arrow/flatbuf/sparse_tensor_index.rb | 2 +- .../arrow/flatbuf/sparse_tensor_index_coo.rb | 17 ++- .../arrow/flatbuf/sparse_tensor_index_csf.rb | 18 ++- .../org/apache/arrow/flatbuf/struct_.rb | 6 +- .../org/apache/arrow/flatbuf/tensor.rb | 12 +- .../org/apache/arrow/flatbuf/tensor_dim.rb | 9 +- .../org/apache/arrow/flatbuf/time.rb | 9 +- .../org/apache/arrow/flatbuf/time_unit.rb | 2 +- .../org/apache/arrow/flatbuf/timestamp.rb | 9 +- .../org/apache/arrow/flatbuf/type.rb | 2 +- .../org/apache/arrow/flatbuf/union.rb | 9 +- .../org/apache/arrow/flatbuf/union_mode.rb | 2 +- .../org/apache/arrow/flatbuf/utf8.rb | 6 +- .../org/apache/arrow/flatbuf/utf8view.rb | 6 +- .../lib/arrow-format/readable.rb | 98 ++++++---------- .../lib/arrow-format/record-batch.rb | 54 +++++++++ .../lib/arrow-format/schema.rb | 9 ++ .../lib/arrow-format/streaming-pull-reader.rb | 10 +- .../lib/arrow-format/streaming-writer.rb | 97 ++++++++++++++++ .../red-arrow-format/lib/arrow-format/type.rb | 14 ++- .../red-arrow-format/red-arrow-format.gemspec | 2 +- ruby/red-arrow-format/test/test-reader.rb | 2 +- ruby/red-arrow-format/test/test-writer.rb | 108 ++++++++++++++++++ ruby/red-arrow/lib/arrow/column.rb | 4 + 77 files changed, 909 insertions(+), 179 deletions(-) create mode 100644 ruby/red-arrow-format/lib/arrow-format/file-writer.rb create mode 100644 ruby/red-arrow-format/lib/arrow-format/flat-buffers.rb create mode 100644 ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb create mode 100644 ruby/red-arrow-format/test/test-writer.rb diff --git a/ruby/red-arrow-format/Rakefile b/ruby/red-arrow-format/Rakefile index f56c4c79352..f50f18f3b82 100644 --- a/ruby/red-arrow-format/Rakefile +++ b/ruby/red-arrow-format/Rakefile @@ -39,7 +39,7 @@ task :test do end end -namespace :flatbuffers do +namespace :flat_buffers do desc "Generate FlatBuffers code" task :generate do Dir.mktmpdir do |tmp_dir| diff --git a/ruby/red-arrow-format/lib/arrow-format.rb b/ruby/red-arrow-format/lib/arrow-format.rb index 2c8ecbf55c7..d1cb1fa99d7 100644 --- a/ruby/red-arrow-format/lib/arrow-format.rb +++ b/ruby/red-arrow-format/lib/arrow-format.rb @@ -16,5 +16,7 @@ # under the License. require_relative "arrow-format/file-reader" +require_relative "arrow-format/file-writer" require_relative "arrow-format/streaming-reader" +require_relative "arrow-format/streaming-writer" require_relative "arrow-format/version" diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb b/ruby/red-arrow-format/lib/arrow-format/array.rb index 0c27e24bc6a..077da766360 100644 --- a/ruby/red-arrow-format/lib/arrow-format/array.rb +++ b/ruby/red-arrow-format/lib/arrow-format/array.rb @@ -31,18 +31,32 @@ def initialize(type, size, validity_buffer) def valid?(i) return true if @validity_buffer.nil? - (@validity_buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0 + validity_bitmap[i] == 1 end def null?(i) not valid?(i) end + def n_nulls + if @validity_buffer.nil? + 0 + else + # TODO: popcount + validity_bitmap.count do |bit| + bit == 1 + end + end + end + private + def validity_bitmap + @validity_bitmap ||= Bitmap.new(@validity_buffer, @size) + end + def apply_validity(array) return array if @validity_buffer.nil? - @validity_bitmap ||= Bitmap.new(@validity_buffer, @size) - @validity_bitmap.each_with_index do |bit, i| + validity_bitmap.each_with_index do |bit, i| array[i] = nil if bit.zero? end array @@ -54,17 +68,30 @@ def initialize(type, size) super(type, size, nil) end + def each_buffer + return to_enum(__method__) unless block_given? + end + def to_a [nil] * @size end end - class BooleanArray < Array + class PrimitiveArray < Array def initialize(type, size, validity_buffer, values_buffer) super(type, size, validity_buffer) @values_buffer = values_buffer end + def each_buffer + return to_enum(__method__) unless block_given? + + yield(@validity_buffer) + yield(@values_buffer) + end + end + + class BooleanArray < PrimitiveArray def to_a @values_bitmap ||= Bitmap.new(@values_buffer, @size) values = @values_bitmap.each.collect do |bit| @@ -74,12 +101,7 @@ def to_a end end - class IntArray < Array - def initialize(type, size, validity_buffer, values_buffer) - super(type, size, validity_buffer) - @values_buffer = values_buffer - end - + class IntArray < PrimitiveArray def to_a apply_validity(@values_buffer.values(@type.buffer_type, 0, @size)) end @@ -109,11 +131,7 @@ class Int64Array < IntArray class UInt64Array < IntArray end - class FloatingPointArray < Array - def initialize(type, size, validity_buffer, values_buffer) - super(type, size, validity_buffer) - @values_buffer = values_buffer - end + class FloatingPointArray < PrimitiveArray end class Float32Array < FloatingPointArray @@ -128,11 +146,7 @@ def to_a end end - class TemporalArray < Array - def initialize(type, size, validity_buffer, values_buffer) - super(type, size, validity_buffer) - @values_buffer = values_buffer - end + class TemporalArray < PrimitiveArray end class DateArray < TemporalArray @@ -217,6 +231,14 @@ def initialize(type, size, validity_buffer, offsets_buffer, values_buffer) @values_buffer = values_buffer end + def each_buffer + return to_enum(__method__) unless block_given? + + yield(@validity_buffer) + yield(@offsets_buffer) + yield(@values_buffer) + end + def to_a values = @offsets_buffer. each(buffer_type, 0, @size + 1). diff --git a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb index 6f5b7ea8017..5cff7e63d2a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/bitmap.rb +++ b/ruby/red-arrow-format/lib/arrow-format/bitmap.rb @@ -23,6 +23,10 @@ def initialize(buffer, n_values) @n_values = n_values end + def [](i) + (@validity_buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0 + end + def each return to_enum(__method__) unless block_given? diff --git a/ruby/red-arrow-format/lib/arrow-format/field.rb b/ruby/red-arrow-format/lib/arrow-format/field.rb index 090113cfe6b..f2c9181c3dd 100644 --- a/ruby/red-arrow-format/lib/arrow-format/field.rb +++ b/ruby/red-arrow-format/lib/arrow-format/field.rb @@ -29,5 +29,31 @@ def initialize(name, type, nullable, dictionary_id) def nullable? @nullable end + + def to_flat_buffers + fb_field = FB::Field::Data.new + fb_field.name = @name + fb_field.nullable = @nullable + if @type.is_a?(DictionaryType) + fb_field.type = @type.value_type.to_flat_buffers + dictionary_encoding = FB::DictionaryEncoding::Data.new + dictionary_encoding.id = @dictionary_id + int = FB::Int::Data.new + int.bit_width = @type.index_type.bit_width + int.signed = @type.index_type.signed? + dictionary_encoding.index_type = int + dictionary_encoding.ordered = @type.ordered? + dictionary_encoding.dictionary_kind = + FB::DictionaryKind::DENSE_ARRAY + fb_field.dictionary = dictionary + else + fb_field.type = @type.to_flat_buffers + end + if @type.respond_to?(:children) + fb_field.children = @type.children.collect(&:to_flat_buffers) + end + # fb_field.custom_metadata = @custom_metadata + fb_field + end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb index 545638ca902..6218fbcf143 100644 --- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb +++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb @@ -17,9 +17,6 @@ require_relative "streaming-reader" -require_relative "org/apache/arrow/flatbuf/block" -require_relative "org/apache/arrow/flatbuf/footer" - module ArrowFormat class FileReader include Enumerable @@ -59,9 +56,9 @@ def n_record_batches end def read(i) - fb_message, body = read_block(@record_batch_blocks[i]) + fb_message, body = read_block(@record_batch_blocks[i], :record_batch, i) fb_header = fb_message.header - unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::RecordBatch) + unless fb_header.is_a?(FB::RecordBatch) raise FileReadError.new(@buffer, "Not a record batch message: #{i}: " + fb_header.class.name) @@ -104,10 +101,10 @@ def read_footer footer_size = @buffer.get_value(FOOTER_SIZE_FORMAT, footer_size_offset) footer_data = @buffer.slice(footer_size_offset - footer_size, footer_size) - Org::Apache::Arrow::Flatbuf::Footer.new(footer_data) + FB::Footer.new(footer_data) end - def read_block(block) + def read_block(block, type, i) offset = block.offset # If we can report property error information, we can use @@ -127,7 +124,7 @@ def read_block(block) continuation = @buffer.slice(offset, continuation_size) unless continuation == CONTINUATION_BUFFER raise FileReadError.new(@buffer, - "Invalid continuation: #{i}: " + + "Invalid continuation: #{type}: #{i}: " + continuation.inspect) end offset += continuation_size @@ -141,14 +138,14 @@ def read_block(block) metadata_length_size unless metadata_length == expected_metadata_length raise FileReadError.new(@buffer, - "Invalid metadata length #{i}: " + + "Invalid metadata length: #{type}: #{i}: " + "expected:#{expected_metadata_length} " + "actual:#{metadata_length}") end offset += metadata_length_size metadata = @buffer.slice(offset, metadata_length) - fb_message = Org::Apache::Arrow::Flatbuf::Message.new(metadata) + fb_message = FB::Message.new(metadata) offset += metadata_length body = @buffer.slice(offset, block.body_length) @@ -167,10 +164,10 @@ def read_dictionaries end dictionaries = {} - dictionary_blocks.each do |block| - fb_message, body = read_block(block) + dictionary_blocks.each_with_index do |block, i| + fb_message, body = read_block(block, :dictionary_block, i) fb_header = fb_message.header - unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::DictionaryBatch) + unless fb_header.is_a?(FB::DictionaryBatch) raise FileReadError.new(@buffer, "Not a dictionary batch message: " + fb_header.inspect) diff --git a/ruby/red-arrow-format/lib/arrow-format/file-writer.rb b/ruby/red-arrow-format/lib/arrow-format/file-writer.rb new file mode 100644 index 00000000000..b33de02fe02 --- /dev/null +++ b/ruby/red-arrow-format/lib/arrow-format/file-writer.rb @@ -0,0 +1,56 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "streaming-writer" + +module ArrowFormat + class FileWriter < StreamingWriter + MAGIC = "ARROW1".b + MAGIC_PADDING = "\x00\x00" + + def start(schema) + @fb_schema = schema.to_flat_buffers + write_data(MAGIC) + write_data(MAGIC_PADDING) + super + end + + def finish + super + write_footer + write_data(MAGIC) + @output + end + + private + def build_footer + fb_footer = FB::Footer::Data.new + fb_footer.version = FB::MetadataVersion::V5 + fb_footer.schema = @fb_schema + # fb_footer.dictionaries = ... # TODO + fb_footer.record_batches = @fb_record_batch_blocks + # fb_footer.custom_metadata = ... # TODO + FB::Footer.serialize(fb_footer) + end + + def write_footer + footer = build_footer + write_data(footer) + write_data([footer.bytesize].pack("l<")) + end + end +end diff --git a/ruby/red-arrow-format/lib/arrow-format/flat-buffers.rb b/ruby/red-arrow-format/lib/arrow-format/flat-buffers.rb new file mode 100644 index 00000000000..8b33d8a18e2 --- /dev/null +++ b/ruby/red-arrow-format/lib/arrow-format/flat-buffers.rb @@ -0,0 +1,53 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "org/apache/arrow/flatbuf/binary" +require_relative "org/apache/arrow/flatbuf/block" +require_relative "org/apache/arrow/flatbuf/bool" +require_relative "org/apache/arrow/flatbuf/date" +require_relative "org/apache/arrow/flatbuf/date_unit" +require_relative "org/apache/arrow/flatbuf/decimal" +require_relative "org/apache/arrow/flatbuf/dictionary_encoding" +require_relative "org/apache/arrow/flatbuf/dictionary_batch" +require_relative "org/apache/arrow/flatbuf/duration" +require_relative "org/apache/arrow/flatbuf/fixed_size_binary" +require_relative "org/apache/arrow/flatbuf/floating_point" +require_relative "org/apache/arrow/flatbuf/footer" +require_relative "org/apache/arrow/flatbuf/int" +require_relative "org/apache/arrow/flatbuf/interval" +require_relative "org/apache/arrow/flatbuf/interval_unit" +require_relative "org/apache/arrow/flatbuf/large_binary" +require_relative "org/apache/arrow/flatbuf/large_list" +require_relative "org/apache/arrow/flatbuf/large_utf8" +require_relative "org/apache/arrow/flatbuf/list" +require_relative "org/apache/arrow/flatbuf/map" +require_relative "org/apache/arrow/flatbuf/message" +require_relative "org/apache/arrow/flatbuf/null" +require_relative "org/apache/arrow/flatbuf/precision" +require_relative "org/apache/arrow/flatbuf/record_batch" +require_relative "org/apache/arrow/flatbuf/schema" +require_relative "org/apache/arrow/flatbuf/struct_" +require_relative "org/apache/arrow/flatbuf/time" +require_relative "org/apache/arrow/flatbuf/time_unit" +require_relative "org/apache/arrow/flatbuf/timestamp" +require_relative "org/apache/arrow/flatbuf/union" +require_relative "org/apache/arrow/flatbuf/union_mode" +require_relative "org/apache/arrow/flatbuf/utf8" + +module ArrowFormat + FB = Org::Apache::Arrow::Flatbuf +end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb index b1c8e571784..16e510cddc6 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -13,6 +13,10 @@ module Arrow module Flatbuf # Opaque binary data class Binary < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb index 3a8fb8cc74c..a214c94c837 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/binary_view.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -19,6 +19,10 @@ module Flatbuf # Since it uses a variable number of data buffers, each Field with this type # must have a corresponding entry in `variadicBufferCounts`. class BinaryView < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/block.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/block.rb index 5f6894039d5..b93f86ca3f6 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/block.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/block.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //File.fbs # Rooting type: org.apache.arrow.flatbuf.Footer (//File.fbs) @@ -12,6 +12,14 @@ module Apache module Arrow module Flatbuf class Block < ::FlatBuffers::Struct + FIELDS = { + offset: ::FlatBuffers::Field.new(:offset, 0, 0, :long, 0), + meta_data_length: ::FlatBuffers::Field.new(:meta_data_length, 1, 8, :int, 4), + body_length: ::FlatBuffers::Field.new(:body_length, 2, 16, :long, 0), + } + + Data = define_data_class + # Length of the data (this is aligned so there can be a gap between this and # the metadata). def body_length @@ -25,7 +33,7 @@ def meta_data_length @view.unpack_int(field_offset) end - # Index to the start of the RecordBlock (note this is past the Message header) + # Index to the start of the RecordBatch (note this is past the Message header) def offset field_offset = 0 @view.unpack_long(field_offset) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb index dccfc97a38d..a53fa98ed38 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -17,6 +17,13 @@ module Flatbuf # bodies. Intended for use with RecordBatch but could be used for other # message types class BodyCompression < ::FlatBuffers::Table + FIELDS = { + codec: ::FlatBuffers::Field.new(:codec, 0, 4, :byte, 0), + method: ::FlatBuffers::Field.new(:method, 1, 6, :byte, 0), + } + + Data = define_data_class + # Compressor library. # For LZ4_FRAME, each compressed buffer must consist of a single frame. def codec diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb index fbbada04f4c..34db211da29 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/body_compression_method.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -22,6 +22,8 @@ class BodyCompressionMethod < ::FlatBuffers::Enum # uncompressed length may be set to -1 to indicate that the data that # follows is not compressed, which can be useful for cases where # compression does not yield appreciable savings. + # Also, empty buffers can optionally be written out as 0-byte compressed + # buffers, thereby omitting the 8-bytes length header. BUFFER = register("BUFFER", 0) end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb index 5e0f6cd38a8..a90e9a6a78d 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/bool.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -12,6 +12,10 @@ module Apache module Arrow module Flatbuf class Bool < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb index 76bcf139882..be4ac43b764 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/buffer.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -14,6 +14,13 @@ module Flatbuf # ---------------------------------------------------------------------- # A Buffer represents a single contiguous memory segment class Buffer < ::FlatBuffers::Struct + FIELDS = { + offset: ::FlatBuffers::Field.new(:offset, 0, 0, :long, 0), + length: ::FlatBuffers::Field.new(:length, 1, 8, :long, 0), + } + + Data = define_data_class + # The absolute length (in bytes) of the memory buffer. The memory is found # from offset (inclusive) to offset + length (non-inclusive). When building # messages using the encapsulated IPC message, padding bytes may be written diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb index 4e503ba410e..574330eb110 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/compression_type.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date.rb index 41f07de906b..070023ae173 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -19,6 +19,12 @@ module Flatbuf # leap seconds), where the values are evenly divisible by 86400000 # * Days (32 bits) since the UNIX epoch class Date < ::FlatBuffers::Table + FIELDS = { + unit: ::FlatBuffers::Field.new(:unit, 0, 4, :short, 0), + } + + Data = define_data_class + def unit field_offset = @view.unpack_virtual_offset(4) if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb index d3d1299a090..395fde219bf 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/date_unit.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb index 1bdb27e2a6a..ab7ad2b0ed1 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/decimal.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -16,6 +16,14 @@ module Flatbuf # 128-bit (16-byte) and 256-bit (32-byte) integers are used. # The representation uses the endianness indicated in the Schema. class Decimal < ::FlatBuffers::Table + FIELDS = { + precision: ::FlatBuffers::Field.new(:precision, 0, 4, :int, 0), + scale: ::FlatBuffers::Field.new(:scale, 1, 6, :int, 0), + bit_width: ::FlatBuffers::Field.new(:bit_width, 2, 8, :int, 0), + } + + Data = define_data_class + # Number of bits per value. The accepted widths are 32, 64, 128 and 256. # We use bitWidth for consistency with Int::bitWidth. def bit_width diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb index 247f33449a0..c83b793250f 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_batch.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -19,6 +19,14 @@ module Flatbuf # may be spread across multiple dictionary batches by using the isDelta # flag class DictionaryBatch < ::FlatBuffers::Table + FIELDS = { + id: ::FlatBuffers::Field.new(:id, 0, 4, :long, 0), + data: ::FlatBuffers::Field.new(:data, 1, 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::RecordBatch", 0), + delta?: ::FlatBuffers::Field.new(:delta?, 2, 8, :bool, 0), + } + + Data = define_data_class + def data field_offset = @view.unpack_virtual_offset(6) return nil if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb index 60ad3f4a8e3..560bdef64ff 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_encoding.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -14,6 +14,15 @@ module Apache module Arrow module Flatbuf class DictionaryEncoding < ::FlatBuffers::Table + FIELDS = { + id: ::FlatBuffers::Field.new(:id, 0, 4, :long, 0), + index_type: ::FlatBuffers::Field.new(:index_type, 1, 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", 0), + ordered?: ::FlatBuffers::Field.new(:ordered?, 2, 8, :bool, 0), + dictionary_kind: ::FlatBuffers::Field.new(:dictionary_kind, 3, 10, :short, 0), + } + + Data = define_data_class + def dictionary_kind field_offset = @view.unpack_virtual_offset(10) if field_offset.zero? @@ -47,7 +56,7 @@ def index_type end # By default, dictionaries are not ordered, or the order does not have - # semantic meaning. In some statistical, applications, dictionary-encoding + # semantic meaning. In some statistical applications, dictionary-encoding # is used to represent ordered categorical data, and we provide a way to # preserve that metadata here def ordered? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb index e7634ecf324..7504830838f 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/dictionary_kind.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb index bf91a2ab0a0..5c1d857ab38 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/duration.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -13,6 +13,12 @@ module Apache module Arrow module Flatbuf class Duration < ::FlatBuffers::Table + FIELDS = { + unit: ::FlatBuffers::Field.new(:unit, 0, 4, :short, 0), + } + + Data = define_data_class + def unit field_offset = @view.unpack_virtual_offset(4) if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb index adfe3555f73..fb8af9adf6d 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/endianness.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb index 55da686715e..e6343591b65 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/feature.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field.rb index c748e949483..9a75ae9c48e 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -18,6 +18,18 @@ module Flatbuf # A field represents a named column in a record / row batch or child of a # nested type. class Field < ::FlatBuffers::Table + FIELDS = { + name: ::FlatBuffers::Field.new(:name, 0, 4, :string, 0), + nullable?: ::FlatBuffers::Field.new(:nullable?, 1, 6, :bool, 0), + type_type: ::FlatBuffers::Field.new(:type_type, 2, 8, :utype, 0), + type: ::FlatBuffers::Field.new(:type, 3, 10, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Type", 0), + dictionary: ::FlatBuffers::Field.new(:dictionary, 4, 12, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::DictionaryEncoding", 0), + children: ::FlatBuffers::Field.new(:children, 5, 14, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::Field"], 0), + custom_metadata: ::FlatBuffers::Field.new(:custom_metadata, 6, 16, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::KeyValue"], 0), + } + + Data = define_data_class + # children apply only to nested data types like Struct, List and Union. For # primitive types children will have length 0. def children diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb index 83eeb6719c2..6cfd3dfc0b2 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/field_node.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -21,6 +21,13 @@ module Flatbuf # would have {length: 5, null_count: 2} for its List node, and {length: 6, # null_count: 0} for its Int16 node, as separate FieldNode structs class FieldNode < ::FlatBuffers::Struct + FIELDS = { + length: ::FlatBuffers::Field.new(:length, 0, 0, :long, 0), + null_count: ::FlatBuffers::Field.new(:null_count, 1, 8, :long, 0), + } + + Data = define_data_class + # The number of value slots in the Arrow array at this level of a nested # tree def length diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb index 4659403a928..9e41895188f 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_binary.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -12,6 +12,12 @@ module Apache module Arrow module Flatbuf class FixedSizeBinary < ::FlatBuffers::Table + FIELDS = { + byte_width: ::FlatBuffers::Field.new(:byte_width, 0, 4, :int, 0), + } + + Data = define_data_class + # Number of bytes per value def byte_width field_offset = @view.unpack_virtual_offset(4) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb index 6191a8425c5..42c3d1073ad 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/fixed_size_list.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -12,6 +12,12 @@ module Apache module Arrow module Flatbuf class FixedSizeList < ::FlatBuffers::Table + FIELDS = { + list_size: ::FlatBuffers::Field.new(:list_size, 0, 4, :int, 0), + } + + Data = define_data_class + # Number of list items per value def list_size field_offset = @view.unpack_virtual_offset(4) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb index 7072ce42697..5062f546335 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/floating_point.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -13,6 +13,12 @@ module Apache module Arrow module Flatbuf class FloatingPoint < ::FlatBuffers::Table + FIELDS = { + precision: ::FlatBuffers::Field.new(:precision, 0, 4, :short, 0), + } + + Data = define_data_class + def precision field_offset = @view.unpack_virtual_offset(4) if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb index 6ef0c100bea..f4b36f8090f 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/footer.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //File.fbs # Rooting type: org.apache.arrow.flatbuf.Footer (//File.fbs) @@ -18,7 +18,27 @@ module Flatbuf # ---------------------------------------------------------------------- # Arrow File metadata # - class Footer < ::FlatBuffers::Table + class Footer < ::FlatBuffers::RootTable + class << self + def file_identifier + "" + end + + def file_extension + "" + end + end + + FIELDS = { + version: ::FlatBuffers::Field.new(:version, 0, 4, :short, 0), + schema: ::FlatBuffers::Field.new(:schema, 1, 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Schema", 0), + dictionaries: ::FlatBuffers::Field.new(:dictionaries, 2, 8, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::Block"], 0), + record_batches: ::FlatBuffers::Field.new(:record_batches, 3, 10, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::Block"], 0), + custom_metadata: ::FlatBuffers::Field.new(:custom_metadata, 4, 12, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::KeyValue"], 0), + } + + Data = define_data_class + # User-defined metadata def custom_metadata field_offset = @view.unpack_virtual_offset(12) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/int.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/int.rb index 2b932aac3bb..8d164fe227d 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/int.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/int.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -12,6 +12,13 @@ module Apache module Arrow module Flatbuf class Int < ::FlatBuffers::Table + FIELDS = { + bit_width: ::FlatBuffers::Field.new(:bit_width, 0, 4, :int, 0), + signed?: ::FlatBuffers::Field.new(:signed?, 1, 6, :bool, 0), + } + + Data = define_data_class + def bit_width field_offset = @view.unpack_virtual_offset(4) return 0 if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb index 4ec4b13eed3..62dd5c78a2a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -13,6 +13,12 @@ module Apache module Arrow module Flatbuf class Interval < ::FlatBuffers::Table + FIELDS = { + unit: ::FlatBuffers::Field.new(:unit, 0, 4, :short, 0), + } + + Data = define_data_class + def unit field_offset = @view.unpack_virtual_offset(4) if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb index f4866d52872..e1ce82ef5f6 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/interval_unit.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb index 0faed748e2d..f307eb0a69a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/key_value.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -15,6 +15,13 @@ module Flatbuf # user defined key value pairs to add custom metadata to arrow # key namespacing is the responsibility of the user class KeyValue < ::FlatBuffers::Table + FIELDS = { + key: ::FlatBuffers::Field.new(:key, 0, 4, :string, 0), + value: ::FlatBuffers::Field.new(:value, 1, 6, :string, 0), + } + + Data = define_data_class + def key field_offset = @view.unpack_virtual_offset(4) return nil if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb index ef9bfe050ea..27739e238ef 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_binary.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -14,6 +14,10 @@ module Flatbuf # Same as Binary, but with 64-bit offsets, allowing to represent # extremely large data values. class LargeBinary < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb index e64d3b25a75..1f9bbc44fa3 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -14,6 +14,10 @@ module Flatbuf # Same as List, but with 64-bit offsets, allowing to represent # extremely large data values. class LargeList < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb index 8ed57eaef5c..c10acec3f7d 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_list_view.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -14,6 +14,10 @@ module Flatbuf # Same as ListView, but with 64-bit offsets and sizes, allowing to represent # extremely large data values. class LargeListView < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb index 3d147edf209..5b5177e57cd 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/large_utf8.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -14,6 +14,10 @@ module Flatbuf # Same as Utf8, but with 64-bit offsets, allowing to represent # extremely large data values. class LargeUtf8 < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list.rb index 1155706222f..7ffba6001ca 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -12,6 +12,10 @@ module Apache module Arrow module Flatbuf class List < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb index 057f59f6877..6ea6048c184 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/list_view.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -15,6 +15,10 @@ module Flatbuf # sizes allowing for writes in any order and sharing of child values among # list values. class ListView < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/map.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/map.rb index 366833e52f6..fbc1c0b1f2e 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/map.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/map.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -21,7 +21,7 @@ module Flatbuf # may be set in the metadata for this field. # # In a field with Map type, the field has a child Struct field, which then - # has two children: key type and the second the value type. The names of the + # has two children: the key type and the value type. The names of the # child fields may be respectively "entries", "key", and "value", but this is # not enforced. # @@ -37,6 +37,12 @@ module Flatbuf # for Map can make Map an alias for List. The "layout" attribute for the Map # field must have the same contents as a List. class Map < ::FlatBuffers::Table + FIELDS = { + keys_sorted?: ::FlatBuffers::Field.new(:keys_sorted?, 0, 4, :bool, 0), + } + + Data = define_data_class + # Set to true if the keys within each value are sorted def keys_sorted? field_offset = @view.unpack_virtual_offset(4) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message.rb index b4d1d76689d..e2392e00937 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -14,7 +14,27 @@ module Org module Apache module Arrow module Flatbuf - class Message < ::FlatBuffers::Table + class Message < ::FlatBuffers::RootTable + class << self + def file_identifier + "" + end + + def file_extension + "" + end + end + + FIELDS = { + version: ::FlatBuffers::Field.new(:version, 0, 4, :short, 0), + header_type: ::FlatBuffers::Field.new(:header_type, 1, 6, :utype, 0), + header: ::FlatBuffers::Field.new(:header, 2, 8, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::MessageHeader", 0), + body_length: ::FlatBuffers::Field.new(:body_length, 3, 10, :long, 0), + custom_metadata: ::FlatBuffers::Field.new(:custom_metadata, 4, 12, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::KeyValue"], 0), + } + + Data = define_data_class + def body_length field_offset = @view.unpack_virtual_offset(10) return 0 if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb index 13e3f9b1f10..fe2e272c207 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/message_header.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb index d144c82d628..e96dee69abe 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/metadata_version.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/null.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/null.rb index 2bfab0246de..b6861f7576a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/null.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/null.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -13,6 +13,10 @@ module Arrow module Flatbuf # These are stored in the flatbuffer in the Type union below class Null < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb index 9196ffcd423..ef80f70f919 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/precision.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb index 80b70454a39..d9b7e4e62c0 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/record_batch.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Message.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -18,6 +18,16 @@ module Flatbuf # batch. Some systems call this a "row batch" internally and others a "record # batch". class RecordBatch < ::FlatBuffers::Table + FIELDS = { + length: ::FlatBuffers::Field.new(:length, 0, 4, :long, 0), + nodes: ::FlatBuffers::Field.new(:nodes, 1, 6, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::FieldNode"], 0), + buffers: ::FlatBuffers::Field.new(:buffers, 2, 8, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer"], 0), + compression: ::FlatBuffers::Field.new(:compression, 3, 10, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::BodyCompression", 0), + variadic_buffer_counts: ::FlatBuffers::Field.new(:variadic_buffer_counts, 4, 12, [:long], 0), + } + + Data = define_data_class + # Buffers correspond to the pre-ordered flattened buffer tree # # The number of buffers appended to this list depends on the schema. For @@ -64,7 +74,7 @@ def nodes # Some types such as Utf8View are represented using a variable number of buffers. # For each such Field in the pre-ordered flattened logical schema, there will be - # an entry in variadicBufferCounts to indicate the number of number of variadic + # an entry in variadicBufferCounts to indicate the number of variadic # buffers which belong to that Field in the current RecordBatch. # # For example, the schema diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb index 7141da0f31e..0f4e80e5ef9 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/run_end_encoded.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -17,6 +17,10 @@ module Flatbuf # each corresponding index in the values child array ends. # Like list/struct types, the value array can be of any type. class RunEndEncoded < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb index 5997c409a97..77b171c1159 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/schema.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -17,6 +17,15 @@ module Flatbuf # ---------------------------------------------------------------------- # A Schema describes the columns in a row batch class Schema < ::FlatBuffers::Table + FIELDS = { + endianness: ::FlatBuffers::Field.new(:endianness, 0, 4, :short, 0), + fields: ::FlatBuffers::Field.new(:fields, 1, 6, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::Field"], 0), + custom_metadata: ::FlatBuffers::Field.new(:custom_metadata, 2, 8, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::KeyValue"], 0), + features: ::FlatBuffers::Field.new(:features, 3, 10, [:long], 0), + } + + Data = define_data_class + def custom_metadata field_offset = @view.unpack_virtual_offset(8) return nil if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb index ad8b63e8952..7684bcf79fd 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_compressed_axis.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //SparseTensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb index f1ab5be5f8e..d16a8b3562a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_matrix_index_csx.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //SparseTensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -16,6 +16,16 @@ module Arrow module Flatbuf # Compressed Sparse format, that is matrix-specific. class SparseMatrixIndexCSX < ::FlatBuffers::Table + FIELDS = { + compressed_axis: ::FlatBuffers::Field.new(:compressed_axis, 0, 4, :short, 0), + indptr_type: ::FlatBuffers::Field.new(:indptr_type, 1, 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", 0), + indptr_buffer: ::FlatBuffers::Field.new(:indptr_buffer, 2, 8, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer", 0), + indices_type: ::FlatBuffers::Field.new(:indices_type, 3, 10, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", 0), + indices_buffer: ::FlatBuffers::Field.new(:indices_buffer, 4, 12, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer", 0), + } + + Data = define_data_class + # Which axis, row or column, is compressed def compressed_axis field_offset = @view.unpack_virtual_offset(4) @@ -31,7 +41,7 @@ def compressed_axis # contains the column indices of the corresponding non-zero values. # The type of index value is long. # - # For example, the indices of the above X is: + # For example, the indices of the above X are: # ```text # indices(X) = [1, 2, 2, 1, 3, 0, 2, 3, 1]. # ``` diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb index ade483a4876..4e8fb9135d5 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //SparseTensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -16,6 +16,18 @@ module Apache module Arrow module Flatbuf class SparseTensor < ::FlatBuffers::Table + FIELDS = { + type_type: ::FlatBuffers::Field.new(:type_type, 0, 4, :utype, 0), + type: ::FlatBuffers::Field.new(:type, 1, 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Type", 0), + shape: ::FlatBuffers::Field.new(:shape, 2, 8, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::TensorDim"], 0), + non_zero_length: ::FlatBuffers::Field.new(:non_zero_length, 3, 10, :long, 0), + sparse_index_type: ::FlatBuffers::Field.new(:sparse_index_type, 4, 12, :utype, 0), + sparse_index: ::FlatBuffers::Field.new(:sparse_index, 5, 14, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::SparseTensorIndex", 0), + data: ::FlatBuffers::Field.new(:data, 6, 16, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer", 0), + } + + Data = define_data_class + # The location and size of the tensor's data def data field_offset = @view.unpack_virtual_offset(16) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb index a857b7fe6e7..1c9e6be2242 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //SparseTensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb index 0807721a044..bd3e450c7e3 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_coo.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //SparseTensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -17,11 +17,11 @@ module Flatbuf # EXPERIMENTAL: Data structures for sparse tensors # Coordinate (COO) format of sparse tensor index. # - # COO's index list are represented as a NxM matrix, + # COO's index list is represented as an NxM matrix, # where N is the number of non-zero values, # and M is the number of dimensions of a sparse tensor. # - # indicesBuffer stores the location and size of the data of this indices + # indicesBuffer stores the location and size of the data of these indices # matrix. The value type and the stride of the indices matrix is # specified in indicesType and indicesStrides fields. # @@ -42,10 +42,19 @@ module Flatbuf # [2, 2, 3, 1, 2, 0], # [0, 1, 0, 0, 3, 4]] # ``` - # When isCanonical is true, the indices is sorted in lexicographical order + # When isCanonical is true, the indices are sorted in lexicographical order # (row-major order), and it does not have duplicated entries. Otherwise, # the indices may not be sorted, or may have duplicated entries. class SparseTensorIndexCOO < ::FlatBuffers::Table + FIELDS = { + indices_type: ::FlatBuffers::Field.new(:indices_type, 0, 4, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", 0), + indices_strides: ::FlatBuffers::Field.new(:indices_strides, 1, 6, [:long], 0), + indices_buffer: ::FlatBuffers::Field.new(:indices_buffer, 2, 8, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer", 0), + canonical?: ::FlatBuffers::Field.new(:canonical?, 3, 10, :bool, 0), + } + + Data = define_data_class + # The location and size of the indices matrix's data def indices_buffer field_offset = @view.unpack_virtual_offset(8) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb index 5e86ade5c52..979b263cd70 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/sparse_tensor_index_csf.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //SparseTensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -15,6 +15,16 @@ module Arrow module Flatbuf # Compressed Sparse Fiber (CSF) sparse tensor index. class SparseTensorIndexCSF < ::FlatBuffers::Table + FIELDS = { + indptr_type: ::FlatBuffers::Field.new(:indptr_type, 0, 4, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", 0), + indptr_buffers: ::FlatBuffers::Field.new(:indptr_buffers, 1, 6, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer"], 0), + indices_type: ::FlatBuffers::Field.new(:indices_type, 2, 8, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Int", 0), + indices_buffers: ::FlatBuffers::Field.new(:indices_buffers, 3, 10, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer"], 0), + axis_order: ::FlatBuffers::Field.new(:axis_order, 4, 12, [:int], 0), + } + + Data = define_data_class + # axisOrder stores the sequence in which dimensions were traversed to # produce the prefix tree. # For example, the axisOrder for the above X is: @@ -33,7 +43,7 @@ def axis_order # indicesBuffers stores values of nodes. # Each tensor dimension corresponds to a buffer in indicesBuffers. - # For example, the indicesBuffers for the above X is: + # For example, the indicesBuffers for the above X are: # ```text # indicesBuffer(X) = [ # [0, 1], @@ -66,7 +76,7 @@ def indices_type # and `indptrBuffers[dim][i + 1]` signify a range of nodes in # `indicesBuffers[dim + 1]` who are children of `indicesBuffers[dim][i]` node. # - # For example, the indptrBuffers for the above X is: + # For example, the indptrBuffers for the above X are: # ```text # indptrBuffer(X) = [ # [0, 2, 3], @@ -90,7 +100,7 @@ def indptr_buffers # CSF index recursively compresses each dimension of a tensor into a set # of prefix trees. Each path from a root to leaf forms one tensor # non-zero index. CSF is implemented with two arrays of buffers and one - # arrays of integers. + # array of integers. # # For example, let X be a 2x3x4x5 tensor and let it have the following # 8 non-zero values: diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb index 0b2808716a5..87582733c95 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/struct_.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -15,6 +15,10 @@ module Flatbuf # (according to the physical memory layout). We used Struct_ here as # Struct is a reserved word in Flatbuffers class Struct < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb index b8097cec22d..9e1594e7e34 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Tensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -15,6 +15,16 @@ module Apache module Arrow module Flatbuf class Tensor < ::FlatBuffers::Table + FIELDS = { + type_type: ::FlatBuffers::Field.new(:type_type, 0, 4, :utype, 0), + type: ::FlatBuffers::Field.new(:type, 1, 6, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Type", 0), + shape: ::FlatBuffers::Field.new(:shape, 2, 8, ["::ArrowFormat::Org::Apache::Arrow::Flatbuf::TensorDim"], 0), + strides: ::FlatBuffers::Field.new(:strides, 3, 10, [:long], 0), + data: ::FlatBuffers::Field.new(:data, 4, 12, "::ArrowFormat::Org::Apache::Arrow::Flatbuf::Buffer", 0), + } + + Data = define_data_class + # The location and size of the tensor's data def data field_offset = @view.unpack_virtual_offset(12) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb index 452c1dd60c4..ff6af869f88 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/tensor_dim.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Tensor.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -15,6 +15,13 @@ module Flatbuf # Data structures for dense tensors # Shape data for a single axis in a tensor class TensorDim < ::FlatBuffers::Table + FIELDS = { + size: ::FlatBuffers::Field.new(:size, 0, 4, :long, 0), + name: ::FlatBuffers::Field.new(:name, 1, 6, :string, 0), + } + + Data = define_data_class + # Name of the dimension, optional def name field_offset = @view.unpack_virtual_offset(6) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time.rb index eb5c0b27052..244b08219d0 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -27,6 +27,13 @@ module Flatbuf # measurements with leap seconds will need to be corrected when ingesting # into Arrow (for example by replacing the value 86400 with 86399). class Time < ::FlatBuffers::Table + FIELDS = { + unit: ::FlatBuffers::Field.new(:unit, 0, 4, :short, 0), + bit_width: ::FlatBuffers::Field.new(:bit_width, 1, 6, :int, 0), + } + + Data = define_data_class + def bit_width field_offset = @view.unpack_virtual_offset(6) return 32 if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb index b01d95cdc32..943f0ec12e7 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/time_unit.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb index 6d898a456ac..179b018d56e 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/timestamp.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -118,6 +118,13 @@ module Flatbuf # was UTC; for example, the naive date-time "January 1st 1970, 00h00" would # be encoded as timestamp value 0. class Timestamp < ::FlatBuffers::Table + FIELDS = { + unit: ::FlatBuffers::Field.new(:unit, 0, 4, :short, 0), + timezone: ::FlatBuffers::Field.new(:timezone, 1, 6, :string, 0), + } + + Data = define_data_class + # The timezone is an optional string indicating the name of a timezone, # one of: # diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/type.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/type.rb index da89a89830f..44e985f98fe 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/type.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union.rb index 83b73e22097..3f361159786 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -17,6 +17,13 @@ module Flatbuf # optionally typeIds provides an indirection between the child offset and the type id # for each child `typeIds[offset]` is the id used in the type vector class Union < ::FlatBuffers::Table + FIELDS = { + mode: ::FlatBuffers::Field.new(:mode, 0, 4, :short, 0), + type_ids: ::FlatBuffers::Field.new(:type_ids, 1, 6, [:int], 0), + } + + Data = define_data_class + def mode field_offset = @view.unpack_virtual_offset(4) if field_offset.zero? diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb index f4d862a430e..96d30cae329 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/union_mode.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb index 916c5dd3e95..27bd61cf8ce 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -13,6 +13,10 @@ module Arrow module Flatbuf # Unicode with UTF-8 encoding class Utf8 < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb index d627cdd8f11..6a4d687579a 100644 --- a/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb +++ b/ruby/red-arrow-format/lib/arrow-format/org/apache/arrow/flatbuf/utf8view.rb @@ -1,6 +1,6 @@ # Automatically generated. Don't modify manually. # -# Red FlatBuffers version: 0.0.3 +# Red FlatBuffers version: 0.0.4 # Declared by: //Schema.fbs # Rooting type: org.apache.arrow.flatbuf.Message (//Message.fbs) @@ -19,6 +19,10 @@ module Flatbuf # Since it uses a variable number of data buffers, each Field with this type # must have a corresponding entry in `variadicBufferCounts`. class Utf8View < ::FlatBuffers::Table + FIELDS = { + } + + Data = define_data_class end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/readable.rb b/ruby/red-arrow-format/lib/arrow-format/readable.rb index ad6be653e06..9cf1beecbeb 100644 --- a/ruby/red-arrow-format/lib/arrow-format/readable.rb +++ b/ruby/red-arrow-format/lib/arrow-format/readable.rb @@ -17,41 +17,11 @@ require_relative "array" require_relative "field" +require_relative "flat-buffers" require_relative "record-batch" require_relative "schema" require_relative "type" -require_relative "org/apache/arrow/flatbuf/binary" -require_relative "org/apache/arrow/flatbuf/bool" -require_relative "org/apache/arrow/flatbuf/date" -require_relative "org/apache/arrow/flatbuf/date_unit" -require_relative "org/apache/arrow/flatbuf/decimal" -require_relative "org/apache/arrow/flatbuf/dictionary_encoding" -require_relative "org/apache/arrow/flatbuf/dictionary_batch" -require_relative "org/apache/arrow/flatbuf/duration" -require_relative "org/apache/arrow/flatbuf/fixed_size_binary" -require_relative "org/apache/arrow/flatbuf/floating_point" -require_relative "org/apache/arrow/flatbuf/int" -require_relative "org/apache/arrow/flatbuf/interval" -require_relative "org/apache/arrow/flatbuf/interval_unit" -require_relative "org/apache/arrow/flatbuf/large_binary" -require_relative "org/apache/arrow/flatbuf/large_list" -require_relative "org/apache/arrow/flatbuf/large_utf8" -require_relative "org/apache/arrow/flatbuf/list" -require_relative "org/apache/arrow/flatbuf/map" -require_relative "org/apache/arrow/flatbuf/message" -require_relative "org/apache/arrow/flatbuf/null" -require_relative "org/apache/arrow/flatbuf/precision" -require_relative "org/apache/arrow/flatbuf/record_batch" -require_relative "org/apache/arrow/flatbuf/schema" -require_relative "org/apache/arrow/flatbuf/struct_" -require_relative "org/apache/arrow/flatbuf/time" -require_relative "org/apache/arrow/flatbuf/time_unit" -require_relative "org/apache/arrow/flatbuf/timestamp" -require_relative "org/apache/arrow/flatbuf/union" -require_relative "org/apache/arrow/flatbuf/union_mode" -require_relative "org/apache/arrow/flatbuf/utf8" - module ArrowFormat module Readable private @@ -65,87 +35,87 @@ def read_schema(fb_schema) def read_field(fb_field) fb_type = fb_field.type case fb_type - when Org::Apache::Arrow::Flatbuf::Null + when FB::Null type = NullType.singleton - when Org::Apache::Arrow::Flatbuf::Bool + when FB::Bool type = BooleanType.singleton - when Org::Apache::Arrow::Flatbuf::Int + when FB::Int type = read_type_int(fb_type) - when Org::Apache::Arrow::Flatbuf::FloatingPoint + when FB::FloatingPoint case fb_type.precision - when Org::Apache::Arrow::Flatbuf::Precision::SINGLE + when FB::Precision::SINGLE type = Float32Type.singleton - when Org::Apache::Arrow::Flatbuf::Precision::DOUBLE + when FB::Precision::DOUBLE type = Float64Type.singleton end - when Org::Apache::Arrow::Flatbuf::Date + when FB::Date case fb_type.unit - when Org::Apache::Arrow::Flatbuf::DateUnit::DAY + when FB::DateUnit::DAY type = Date32Type.singleton - when Org::Apache::Arrow::Flatbuf::DateUnit::MILLISECOND + when FB::DateUnit::MILLISECOND type = Date64Type.singleton end - when Org::Apache::Arrow::Flatbuf::Time + when FB::Time case fb_type.bit_width when 32 case fb_type.unit - when Org::Apache::Arrow::Flatbuf::TimeUnit::SECOND + when FB::TimeUnit::SECOND type = Time32Type.new(:second) - when Org::Apache::Arrow::Flatbuf::TimeUnit::MILLISECOND + when FB::TimeUnit::MILLISECOND type = Time32Type.new(:millisecond) end when 64 case fb_type.unit - when Org::Apache::Arrow::Flatbuf::TimeUnit::MICROSECOND + when FB::TimeUnit::MICROSECOND type = Time64Type.new(:microsecond) - when Org::Apache::Arrow::Flatbuf::TimeUnit::NANOSECOND + when FB::TimeUnit::NANOSECOND type = Time64Type.new(:nanosecond) end end - when Org::Apache::Arrow::Flatbuf::Timestamp + when FB::Timestamp unit = fb_type.unit.name.downcase.to_sym type = TimestampType.new(unit, fb_type.timezone) - when Org::Apache::Arrow::Flatbuf::Interval + when FB::Interval case fb_type.unit - when Org::Apache::Arrow::Flatbuf::IntervalUnit::YEAR_MONTH + when FB::IntervalUnit::YEAR_MONTH type = YearMonthIntervalType.new - when Org::Apache::Arrow::Flatbuf::IntervalUnit::DAY_TIME + when FB::IntervalUnit::DAY_TIME type = DayTimeIntervalType.new - when Org::Apache::Arrow::Flatbuf::IntervalUnit::MONTH_DAY_NANO + when FB::IntervalUnit::MONTH_DAY_NANO type = MonthDayNanoIntervalType.new end - when Org::Apache::Arrow::Flatbuf::Duration + when FB::Duration unit = fb_type.unit.name.downcase.to_sym type = DurationType.new(unit) - when Org::Apache::Arrow::Flatbuf::List + when FB::List type = ListType.new(read_field(fb_field.children[0])) - when Org::Apache::Arrow::Flatbuf::LargeList + when FB::LargeList type = LargeListType.new(read_field(fb_field.children[0])) - when Org::Apache::Arrow::Flatbuf::Struct + when FB::Struct children = fb_field.children.collect {|child| read_field(child)} type = StructType.new(children) - when Org::Apache::Arrow::Flatbuf::Union + when FB::Union children = fb_field.children.collect {|child| read_field(child)} type_ids = fb_type.type_ids case fb_type.mode - when Org::Apache::Arrow::Flatbuf::UnionMode::DENSE + when FB::UnionMode::DENSE type = DenseUnionType.new(children, type_ids) - when Org::Apache::Arrow::Flatbuf::UnionMode::SPARSE + when FB::UnionMode::SPARSE type = SparseUnionType.new(children, type_ids) end - when Org::Apache::Arrow::Flatbuf::Map + when FB::Map type = MapType.new(read_field(fb_field.children[0])) - when Org::Apache::Arrow::Flatbuf::Binary + when FB::Binary type = BinaryType.singleton - when Org::Apache::Arrow::Flatbuf::LargeBinary + when FB::LargeBinary type = LargeBinaryType.singleton - when Org::Apache::Arrow::Flatbuf::Utf8 + when FB::Utf8 type = UTF8Type.singleton - when Org::Apache::Arrow::Flatbuf::LargeUtf8 + when FB::LargeUtf8 type = LargeUTF8Type.singleton - when Org::Apache::Arrow::Flatbuf::FixedSizeBinary + when FB::FixedSizeBinary type = FixedSizeBinaryType.new(fb_type.byte_width) - when Org::Apache::Arrow::Flatbuf::Decimal + when FB::Decimal case fb_type.bit_width when 128 type = Decimal128Type.new(fb_type.precision, fb_type.scale) diff --git a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb index 1258c62babe..e413b6f8e81 100644 --- a/ruby/red-arrow-format/lib/arrow-format/record-batch.rb +++ b/ruby/red-arrow-format/lib/arrow-format/record-batch.rb @@ -32,5 +32,59 @@ def to_h end hash end + + def to_flat_buffers + fb_record_batch = FB::RecordBatch::Data.new + fb_record_batch.length = @n_rows + fb_record_batch.nodes = all_columns_enumerator.collect do |array| + field_node = FB::FieldNode::Data.new + field_node.length = array.size + field_node.null_count = array.n_nulls + field_node + end + offset = 0 + fb_record_batch.buffers = all_buffers_enumerator.collect do |buffer| + buffer_flat_buffesr = FB::Buffer::Data.new + buffer_flat_buffesr.offset = offset + if buffer + offset += buffer.size + buffer_flat_buffesr.length = buffer.size + else + buffer_flat_buffesr.length = 0 + end + buffer_flat_buffesr + end + # body_compression = FB::BodyCompression::Data.new + # body_compression.codec = ... + # fb_record_batch.compression = body_compression + fb_record_batch + end + + # Pre-order depth-first traversal + def all_columns_enumerator + Enumerator.new do |yielder| + traverse = lambda do |array| + yielder << array + if array.respond_to?(:children) + array.children.each do |child_array| + traverse.call(child_array) + end + end + end + @columns.each do |array| + traverse.call(array) + end + end + end + + def all_buffers_enumerator + Enumerator.new do |yielder| + all_columns_enumerator.each do |array| + array.each_buffer do |buffer| + yielder << buffer + end + end + end + end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/schema.rb b/ruby/red-arrow-format/lib/arrow-format/schema.rb index 5e58f68280e..841f60afad5 100644 --- a/ruby/red-arrow-format/lib/arrow-format/schema.rb +++ b/ruby/red-arrow-format/lib/arrow-format/schema.rb @@ -20,5 +20,14 @@ class Schema def initialize(fields) @fields = fields end + + def to_flat_buffers + fb_schema = FB::Schema::Data.new + fb_schema.endianness = FB::Endianness::LITTLE + fb_schema.fields = fields.collect(&:to_flat_buffers) + # fb_schema.custom_metadata = @custom_metadata + # fb_schema.features = @features + fb_schema + end end end diff --git a/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb b/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb index 8682f3e826b..ffa4cb55345 100644 --- a/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb +++ b/ruby/red-arrow-format/lib/arrow-format/streaming-pull-reader.rb @@ -123,7 +123,7 @@ def consume_metadata_length(target) def consume_metadata(target) metadata_buffer = target.slice(0, @metadata_length) - @message = Org::Apache::Arrow::Flatbuf::Message.new(metadata_buffer) + @message = FB::Message.new(metadata_buffer) @body_length = @message.body_length if @body_length < 0 raise ReadError.new("Negative body length: " + @@ -174,7 +174,7 @@ def process_message(message, body) process_schema_message(message, body) when :initial_dictionaries header = message.header - unless header.is_a?(Org::Apache::Arrow::Flatbuf::DictionaryBatch) + unless header.is_a?(FB::DictionaryBatch) raise ReadError.new("Not a dictionary batch message: " + header.inspect) end @@ -184,9 +184,9 @@ def process_message(message, body) end when :data case message.header - when Org::Apache::Arrow::Flatbuf::DictionaryBatch + when FB::DictionaryBatch process_dictionary_batch_message(message, body) - when Org::Apache::Arrow::Flatbuf::RecordBatch + when FB::RecordBatch process_record_batch_message(message, body) end end @@ -194,7 +194,7 @@ def process_message(message, body) def process_schema_message(message, body) header = message.header - unless header.is_a?(Org::Apache::Arrow::Flatbuf::Schema) + unless header.is_a?(FB::Schema) raise ReadError.new("Not a schema message: " + header.inspect) end diff --git a/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb new file mode 100644 index 00000000000..211c0bbdb58 --- /dev/null +++ b/ruby/red-arrow-format/lib/arrow-format/streaming-writer.rb @@ -0,0 +1,97 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +require_relative "flat-buffers" + +module ArrowFormat + class StreamingWriter + include FlatBuffers::Alignable + + ALIGNMENT_SIZE = IO::Buffer.size_of(:u64) + CONTINUATION = "\xFF\xFF\xFF\xFF".b.freeze + EOS = "\xFF\xFF\xFF\xFF\x00\x00\x00\x00".b.freeze + METADATA_LARGEST_PADDING = "\x00" * 7 + + def initialize(output) + @output = output + @offset = 0 + @fb_record_batch_blocks = [] + end + + def start(schema) + write_message(build_metadata(schema.to_flat_buffers)) + # TODO: Write dictionaries + end + + def write_record_batch(record_batch) + body_length = 0 + record_batch.all_buffers_enumerator.each do |buffer| + body_length += buffer.size if buffer + end + metadata = build_metadata(record_batch.to_flat_buffers, body_length) + fb_block = FB::Block::Data.new + fb_block.offset = @offset + fb_block.meta_data_length = + CONTINUATION.bytesize + + MessagePullReader::METADATA_LENGTH_SIZE + + metadata.bytesize + fb_block.body_length = body_length + @fb_record_batch_blocks << fb_block + write_message(metadata) do + record_batch.all_buffers_enumerator.each do |buffer| + write_data(buffer) if buffer + end + end + end + + # TODO + # def write_dictionary_delta(id, dictionary) + # end + + def finish + write_data(EOS) + @output + end + + private + def write_data(data) + @output << data + @offset += data.bytesize + end + + def build_metadata(header, body_length=0) + fb_message = FB::Message::Data.new + fb_message.version = FB::MetadataVersion::V5 + fb_message.header = header + fb_message.body_length = body_length + metadata = FB::Message.serialize(fb_message) + metadata_size = metadata.bytesize + padding_size = compute_padding_size(metadata_size, ALIGNMENT_SIZE) + metadata_size += padding_size + align!(metadata, ALIGNMENT_SIZE) + metadata + end + + def write_message(metadata) + write_data(CONTINUATION) + metadata_size = metadata.bytesize + write_data([metadata_size].pack("l<")) + write_data(metadata) + yield if block_given? + end + end +end diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb index ebf4ce5fa95..4f71b39187b 100644 --- a/ruby/red-arrow-format/lib/arrow-format/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/type.rb @@ -32,6 +32,10 @@ def name def build_array(size) NullArray.new(self, size) end + + def to_flat_buffers + FB::Null::Data.new + end end class BooleanType < Type @@ -55,12 +59,15 @@ class NumberType < Type class IntType < NumberType attr_reader :bit_width - attr_reader :signed def initialize(bit_width, signed) super() @bit_width = bit_width @signed = signed end + + def signed? + @signed + end end class Int8Type < IntType @@ -681,7 +688,6 @@ def build_array(size, types_buffer, children) class DictionaryType < Type attr_reader :index_type attr_reader :value_type - attr_reader :ordered def initialize(index_type, value_type, ordered) super() @index_type = index_type @@ -689,6 +695,10 @@ def initialize(index_type, value_type, ordered) @ordered = ordered end + def ordered? + @ordered + end + def name "Dictionary" end diff --git a/ruby/red-arrow-format/red-arrow-format.gemspec b/ruby/red-arrow-format/red-arrow-format.gemspec index babe1466cfb..5e489f14ae5 100644 --- a/ruby/red-arrow-format/red-arrow-format.gemspec +++ b/ruby/red-arrow-format/red-arrow-format.gemspec @@ -46,7 +46,7 @@ Gem::Specification.new do |spec| spec.files += Dir.glob("lib/**/*.rb") spec.files += Dir.glob("doc/text/*") - spec.add_runtime_dependency("red-flatbuffers") + spec.add_runtime_dependency("red-flatbuffers", ">=0.0.4") github_url = "https://github.com/apache/arrow" spec.metadata = { diff --git a/ruby/red-arrow-format/test/test-reader.rb b/ruby/red-arrow-format/test/test-reader.rb index 0e59d855ce8..e0048967376 100644 --- a/ruby/red-arrow-format/test/test-reader.rb +++ b/ruby/red-arrow-format/test/test-reader.rb @@ -934,7 +934,7 @@ def setup end def read - @reader.to_a.collect do |record_batch| + @reader.collect do |record_batch| record_batch.to_h.tap do |hash| hash.each do |key, value| hash[key] = value.to_a diff --git a/ruby/red-arrow-format/test/test-writer.rb b/ruby/red-arrow-format/test/test-writer.rb new file mode 100644 index 00000000000..7acf068d9cf --- /dev/null +++ b/ruby/red-arrow-format/test/test-writer.rb @@ -0,0 +1,108 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +module WriterTests + def convert_type(red_arrow_type) + case red_arrow_type + when Arrow::NullDataType + ArrowFormat::NullType.singleton + end + end + + def convert_array(red_arrow_array) + type = convert_type(red_arrow_array.value_data_type) + case type + when ArrowFormat::NullType + type.build_array(red_arrow_array.size) + end + end + + class << self + def included(base) + base.class_eval do + sub_test_case("Null") do + def build_array + Arrow::NullArray.new(3) + end + + def test_write + assert_equal([nil, nil, nil], + @values) + end + end + end + end + end +end + +class TestFileWriter < Test::Unit::TestCase + include WriterTests + + def setup + Dir.mktmpdir do |tmp_dir| + path = File.join(tmp_dir, "data.arrow") + File.open(path, "wb") do |output| + writer = ArrowFormat::FileWriter.new(output) + red_arrow_array = build_array + array = convert_array(red_arrow_array) + fields = [ + ArrowFormat::Field.new("value", + array.type, + true, + nil), + ] + schema = ArrowFormat::Schema.new(fields) + record_batch = ArrowFormat::RecordBatch.new(schema, array.size, [array]) + writer.start(schema) + writer.write_record_batch(record_batch) + writer.finish + end + data = File.open(path, "rb", &:read).freeze + table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrow) + @values = table.value.values + end + end +end + +class TestStreamingWriter < Test::Unit::TestCase + include WriterTests + + def setup + Dir.mktmpdir do |tmp_dir| + path = File.join(tmp_dir, "data.arrows") + File.open(path, "wb") do |output| + writer = ArrowFormat::StreamingWriter.new(output) + red_arrow_array = build_array + array = convert_array(red_arrow_array) + fields = [ + ArrowFormat::Field.new("value", + array.type, + true, + nil), + ] + schema = ArrowFormat::Schema.new(fields) + record_batch = ArrowFormat::RecordBatch.new(schema, array.size, [array]) + writer.start(schema) + writer.write_record_batch(record_batch) + writer.finish + end + data = File.open(path, "rb", &:read).freeze + table = Arrow::Table.load(Arrow::Buffer.new(data), format: :arrows) + @values = table.value.values + end + end +end diff --git a/ruby/red-arrow/lib/arrow/column.rb b/ruby/red-arrow/lib/arrow/column.rb index e4b0cf4eee7..26be4f448d9 100644 --- a/ruby/red-arrow/lib/arrow/column.rb +++ b/ruby/red-arrow/lib/arrow/column.rb @@ -42,6 +42,10 @@ def to_arrow_chunked_array @data.to_arrow_chunked_array end + def values + @data.values + end + def name @field.name end