Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion ruby/red-arrow-format/Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ task :test do
end
end

namespace :flatbuffers do
namespace :flat_buffers do
desc "Generate FlatBuffers code"
task :generate do
Dir.mktmpdir do |tmp_dir|
Expand Down
2 changes: 2 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,5 +16,7 @@
# under the License.

require_relative "arrow-format/file-reader"
require_relative "arrow-format/file-writer"
require_relative "arrow-format/streaming-reader"
require_relative "arrow-format/streaming-writer"
require_relative "arrow-format/version"
62 changes: 42 additions & 20 deletions ruby/red-arrow-format/lib/arrow-format/array.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,18 +31,32 @@ def initialize(type, size, validity_buffer)

def valid?(i)
return true if @validity_buffer.nil?
(@validity_buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0
validity_bitmap[i] == 1
end

def null?(i)
not valid?(i)
end

def n_nulls
if @validity_buffer.nil?
0
else
# TODO: popcount
validity_bitmap.count do |bit|
bit == 1
end
end
end

private
def validity_bitmap
@validity_bitmap ||= Bitmap.new(@validity_buffer, @size)
end

def apply_validity(array)
return array if @validity_buffer.nil?
@validity_bitmap ||= Bitmap.new(@validity_buffer, @size)
@validity_bitmap.each_with_index do |bit, i|
validity_bitmap.each_with_index do |bit, i|
array[i] = nil if bit.zero?
end
array
Expand All @@ -54,17 +68,30 @@ def initialize(type, size)
super(type, size, nil)
end

def each_buffer
return to_enum(__method__) unless block_given?
end

def to_a
[nil] * @size
end
end

class BooleanArray < Array
class PrimitiveArray < Array
def initialize(type, size, validity_buffer, values_buffer)
super(type, size, validity_buffer)
@values_buffer = values_buffer
end

def each_buffer
return to_enum(__method__) unless block_given?

yield(@validity_buffer)
yield(@values_buffer)
end
end

class BooleanArray < PrimitiveArray
def to_a
@values_bitmap ||= Bitmap.new(@values_buffer, @size)
values = @values_bitmap.each.collect do |bit|
Expand All @@ -74,12 +101,7 @@ def to_a
end
end

class IntArray < Array
def initialize(type, size, validity_buffer, values_buffer)
super(type, size, validity_buffer)
@values_buffer = values_buffer
end

class IntArray < PrimitiveArray
def to_a
apply_validity(@values_buffer.values(@type.buffer_type, 0, @size))
end
Expand Down Expand Up @@ -109,11 +131,7 @@ class Int64Array < IntArray
class UInt64Array < IntArray
end

class FloatingPointArray < Array
def initialize(type, size, validity_buffer, values_buffer)
super(type, size, validity_buffer)
@values_buffer = values_buffer
end
class FloatingPointArray < PrimitiveArray
end

class Float32Array < FloatingPointArray
Expand All @@ -128,11 +146,7 @@ def to_a
end
end

class TemporalArray < Array
def initialize(type, size, validity_buffer, values_buffer)
super(type, size, validity_buffer)
@values_buffer = values_buffer
end
class TemporalArray < PrimitiveArray
end

class DateArray < TemporalArray
Expand Down Expand Up @@ -217,6 +231,14 @@ def initialize(type, size, validity_buffer, offsets_buffer, values_buffer)
@values_buffer = values_buffer
end

def each_buffer
return to_enum(__method__) unless block_given?

yield(@validity_buffer)
yield(@offsets_buffer)
yield(@values_buffer)
end

def to_a
values = @offsets_buffer.
each(buffer_type, 0, @size + 1).
Expand Down
4 changes: 4 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/bitmap.rb
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,10 @@ def initialize(buffer, n_values)
@n_values = n_values
end

def [](i)
(@validity_buffer.get_value(:U8, i / 8) & (1 << (i % 8))) > 0
end

def each
return to_enum(__method__) unless block_given?

Expand Down
26 changes: 26 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/field.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,31 @@ def initialize(name, type, nullable, dictionary_id)
def nullable?
@nullable
end

def to_flat_buffers
fb_field = FB::Field::Data.new
fb_field.name = @name
fb_field.nullable = @nullable
if @type.is_a?(DictionaryType)
fb_field.type = @type.value_type.to_flat_buffers
dictionary_encoding = FB::DictionaryEncoding::Data.new
dictionary_encoding.id = @dictionary_id
int = FB::Int::Data.new
int.bit_width = @type.index_type.bit_width
int.signed = @type.index_type.signed?
dictionary_encoding.index_type = int
dictionary_encoding.ordered = @type.ordered?
dictionary_encoding.dictionary_kind =
FB::DictionaryKind::DENSE_ARRAY
fb_field.dictionary = dictionary
else
fb_field.type = @type.to_flat_buffers
end
if @type.respond_to?(:children)
fb_field.children = @type.children.collect(&:to_flat_buffers)
end
# fb_field.custom_metadata = @custom_metadata
fb_field
end
end
end
23 changes: 10 additions & 13 deletions ruby/red-arrow-format/lib/arrow-format/file-reader.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,6 @@

require_relative "streaming-reader"

require_relative "org/apache/arrow/flatbuf/block"
require_relative "org/apache/arrow/flatbuf/footer"

module ArrowFormat
class FileReader
include Enumerable
Expand Down Expand Up @@ -59,9 +56,9 @@ def n_record_batches
end

def read(i)
fb_message, body = read_block(@record_batch_blocks[i])
fb_message, body = read_block(@record_batch_blocks[i], :record_batch, i)
fb_header = fb_message.header
unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::RecordBatch)
unless fb_header.is_a?(FB::RecordBatch)
raise FileReadError.new(@buffer,
"Not a record batch message: #{i}: " +
fb_header.class.name)
Expand Down Expand Up @@ -104,10 +101,10 @@ def read_footer
footer_size = @buffer.get_value(FOOTER_SIZE_FORMAT, footer_size_offset)
footer_data = @buffer.slice(footer_size_offset - footer_size,
footer_size)
Org::Apache::Arrow::Flatbuf::Footer.new(footer_data)
FB::Footer.new(footer_data)
end

def read_block(block)
def read_block(block, type, i)
offset = block.offset

# If we can report property error information, we can use
Expand All @@ -127,7 +124,7 @@ def read_block(block)
continuation = @buffer.slice(offset, continuation_size)
unless continuation == CONTINUATION_BUFFER
raise FileReadError.new(@buffer,
"Invalid continuation: #{i}: " +
"Invalid continuation: #{type}: #{i}: " +
continuation.inspect)
end
offset += continuation_size
Expand All @@ -141,14 +138,14 @@ def read_block(block)
metadata_length_size
unless metadata_length == expected_metadata_length
raise FileReadError.new(@buffer,
"Invalid metadata length #{i}: " +
"Invalid metadata length: #{type}: #{i}: " +
"expected:#{expected_metadata_length} " +
"actual:#{metadata_length}")
end
offset += metadata_length_size

metadata = @buffer.slice(offset, metadata_length)
fb_message = Org::Apache::Arrow::Flatbuf::Message.new(metadata)
fb_message = FB::Message.new(metadata)
offset += metadata_length

body = @buffer.slice(offset, block.body_length)
Expand All @@ -167,10 +164,10 @@ def read_dictionaries
end

dictionaries = {}
dictionary_blocks.each do |block|
fb_message, body = read_block(block)
dictionary_blocks.each_with_index do |block, i|
fb_message, body = read_block(block, :dictionary_block, i)
fb_header = fb_message.header
unless fb_header.is_a?(Org::Apache::Arrow::Flatbuf::DictionaryBatch)
unless fb_header.is_a?(FB::DictionaryBatch)
raise FileReadError.new(@buffer,
"Not a dictionary batch message: " +
fb_header.inspect)
Expand Down
56 changes: 56 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/file-writer.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

require_relative "streaming-writer"

module ArrowFormat
class FileWriter < StreamingWriter
MAGIC = "ARROW1".b
MAGIC_PADDING = "\x00\x00"

def start(schema)
@fb_schema = schema.to_flat_buffers
write_data(MAGIC)
write_data(MAGIC_PADDING)
super
end

def finish
super
write_footer
write_data(MAGIC)
@output
end

private
def build_footer
fb_footer = FB::Footer::Data.new
fb_footer.version = FB::MetadataVersion::V5
fb_footer.schema = @fb_schema
# fb_footer.dictionaries = ... # TODO
fb_footer.record_batches = @fb_record_batch_blocks
# fb_footer.custom_metadata = ... # TODO
FB::Footer.serialize(fb_footer)
end

def write_footer
footer = build_footer
write_data(footer)
write_data([footer.bytesize].pack("l<"))
end
end
end
53 changes: 53 additions & 0 deletions ruby/red-arrow-format/lib/arrow-format/flat-buffers.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.

require_relative "org/apache/arrow/flatbuf/binary"
require_relative "org/apache/arrow/flatbuf/block"
require_relative "org/apache/arrow/flatbuf/bool"
require_relative "org/apache/arrow/flatbuf/date"
require_relative "org/apache/arrow/flatbuf/date_unit"
require_relative "org/apache/arrow/flatbuf/decimal"
require_relative "org/apache/arrow/flatbuf/dictionary_encoding"
require_relative "org/apache/arrow/flatbuf/dictionary_batch"
require_relative "org/apache/arrow/flatbuf/duration"
require_relative "org/apache/arrow/flatbuf/fixed_size_binary"
require_relative "org/apache/arrow/flatbuf/floating_point"
require_relative "org/apache/arrow/flatbuf/footer"
require_relative "org/apache/arrow/flatbuf/int"
require_relative "org/apache/arrow/flatbuf/interval"
require_relative "org/apache/arrow/flatbuf/interval_unit"
require_relative "org/apache/arrow/flatbuf/large_binary"
require_relative "org/apache/arrow/flatbuf/large_list"
require_relative "org/apache/arrow/flatbuf/large_utf8"
require_relative "org/apache/arrow/flatbuf/list"
require_relative "org/apache/arrow/flatbuf/map"
require_relative "org/apache/arrow/flatbuf/message"
require_relative "org/apache/arrow/flatbuf/null"
require_relative "org/apache/arrow/flatbuf/precision"
require_relative "org/apache/arrow/flatbuf/record_batch"
require_relative "org/apache/arrow/flatbuf/schema"
require_relative "org/apache/arrow/flatbuf/struct_"
require_relative "org/apache/arrow/flatbuf/time"
require_relative "org/apache/arrow/flatbuf/time_unit"
require_relative "org/apache/arrow/flatbuf/timestamp"
require_relative "org/apache/arrow/flatbuf/union"
require_relative "org/apache/arrow/flatbuf/union_mode"
require_relative "org/apache/arrow/flatbuf/utf8"

module ArrowFormat
FB = Org::Apache::Arrow::Flatbuf
end

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading
Loading