diff --git a/ruby/red-arrow-format/lib/arrow-format/array.rb b/ruby/red-arrow-format/lib/arrow-format/array.rb index 23969bc24bd..be10989c33f 100644 --- a/ruby/red-arrow-format/lib/arrow-format/array.rb +++ b/ruby/red-arrow-format/lib/arrow-format/array.rb @@ -183,6 +183,12 @@ def to_a end end + class TimestampArray < TemporalArray + def to_a + apply_validity(@values_buffer.values(:s64, 0, @size)) + end + end + class VariableSizeBinaryLayoutArray < Array def initialize(type, size, validity_buffer, offsets_buffer, values_buffer) super(type, size, validity_buffer) diff --git a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb index 8b149cd1756..cd20f437650 100644 --- a/ruby/red-arrow-format/lib/arrow-format/file-reader.rb +++ b/ruby/red-arrow-format/lib/arrow-format/file-reader.rb @@ -39,6 +39,7 @@ require_relative "org/apache/arrow/flatbuf/schema" require_relative "org/apache/arrow/flatbuf/struct_" require_relative "org/apache/arrow/flatbuf/time" +require_relative "org/apache/arrow/flatbuf/timestamp" require_relative "org/apache/arrow/flatbuf/time_unit" require_relative "org/apache/arrow/flatbuf/union" require_relative "org/apache/arrow/flatbuf/union_mode" @@ -208,6 +209,9 @@ def read_field(fb_field) type = Time64Type.new(:nanosecond) end end + when Org::Apache::Arrow::Flatbuf::Timestamp + unit = fb_type.unit.name.downcase.to_sym + type = TimestampType.new(unit, fb_type.timezone) when Org::Apache::Arrow::Flatbuf::List type = ListType.new(read_field(fb_field.children[0])) when Org::Apache::Arrow::Flatbuf::LargeList diff --git a/ruby/red-arrow-format/lib/arrow-format/type.rb b/ruby/red-arrow-format/lib/arrow-format/type.rb index 055c0890c4b..fe74d5fa046 100644 --- a/ruby/red-arrow-format/lib/arrow-format/type.rb +++ b/ruby/red-arrow-format/lib/arrow-format/type.rb @@ -274,12 +274,16 @@ def build_array(size, validity_buffer, values_buffer) end class TimeType < TemporalType + attr_reader :unit + def initialize(name, unit) + super(name) + @unit = unit + end end class Time32Type < TimeType def initialize(unit) - super("Time32") - @unit = unit + super("Time32", unit) end def build_array(size, validity_buffer, values_buffer) @@ -289,8 +293,7 @@ def build_array(size, validity_buffer, values_buffer) class Time64Type < TimeType def initialize(unit) - super("Time64") - @unit = unit + super("Time64", unit) end def build_array(size, validity_buffer, values_buffer) @@ -298,6 +301,20 @@ def build_array(size, validity_buffer, values_buffer) end end + class TimestampType < TemporalType + attr_reader :unit + attr_reader :timezone + def initialize(unit, timezone) + super("Timestamp") + @unit = unit + @timezone = timezone + end + + def build_array(size, validity_buffer, values_buffer) + TimestampArray.new(self, size, validity_buffer, values_buffer) + end + end + class VariableSizeBinaryType < Type end diff --git a/ruby/red-arrow-format/test/test-file-reader.rb b/ruby/red-arrow-format/test/test-file-reader.rb index b31e8940458..b6634d66691 100644 --- a/ruby/red-arrow-format/test/test-file-reader.rb +++ b/ruby/red-arrow-format/test/test-file-reader.rb @@ -40,6 +40,10 @@ def read end end + def type + @type ||= @reader.first.schema.fields[0].type + end + sub_test_case("Null") do def build_array Arrow::NullArray.new(3) @@ -245,6 +249,10 @@ def test_read assert_equal([{"value" => [@time_00_00_10, nil, @time_00_01_10]}], read) end + + def test_type + assert_equal(:second, type.unit) + end end sub_test_case("Time32(:millisecond)") do @@ -263,6 +271,10 @@ def test_read assert_equal([{"value" => [@time_00_00_10_000, nil, @time_00_01_10_000]}], read) end + + def test_type + assert_equal(:millisecond, type.unit) + end end sub_test_case("Time64(:microsecond)") do @@ -293,6 +305,10 @@ def test_read ], read) end + + def test_type + assert_equal(:microsecond, type.unit) + end end sub_test_case("Time64(:nanosecond)") do @@ -323,6 +339,154 @@ def test_read ], read) end + + def test_type + assert_equal(:nanosecond, type.unit) + end + end + + sub_test_case("Timestamp(:second)") do + def setup(&block) + @timestamp_2019_11_18_00_09_11 = 1574003351 + @timestamp_2025_12_16_05_33_58 = 1765863238 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:second, + [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_read + assert_equal([ + { + "value" => [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ], + }, + ], + read) + end + end + + sub_test_case("Timestamp(:millisecond)") do + def setup(&block) + @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000 + @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:milli, + [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_read + assert_equal([ + { + "value" => [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ], + }, + ], + read) + end + end + + sub_test_case("Timestamp(:microsecond)") do + def setup(&block) + @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000 + @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:micro, + [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_read + assert_equal([ + { + "value" => [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ], + }, + ], + read) + end + end + + sub_test_case("Timestamp(:nanosecond)") do + def setup(&block) + @timestamp_2019_11_18_00_09_11 = 1574003351 * 1_000_000_000 + @timestamp_2025_12_16_05_33_58 = 1765863238 * 1_000_000_000 + super(&block) + end + + def build_array + Arrow::TimestampArray.new(:nano, + [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_read + assert_equal([ + { + "value" => [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ], + }, + ], + read) + end + end + + sub_test_case("Timestamp(timezone)") do + def setup(&block) + @timezone = "UTC" + @timestamp_2019_11_18_00_09_11 = 1574003351 + @timestamp_2025_12_16_05_33_58 = 1765863238 + super(&block) + end + + def build_array + data_type = Arrow::TimestampDataType.new(:second, @timezone) + Arrow::TimestampArray.new(data_type, + [ + @timestamp_2019_11_18_00_09_11, + nil, + @timestamp_2025_12_16_05_33_58, + ]) + end + + def test_type + assert_equal([:second, @timezone], + [type.unit, type.timezone]) + end end sub_test_case("Binary") do