From 8abfeb3e4cec70bd1a7f5746e3847059372bc346 Mon Sep 17 00:00:00 2001 From: jmestwa-coder Date: Mon, 25 May 2026 20:00:23 +0530 Subject: [PATCH 1/2] MINOR: [C++][ORC] Avoid signed overflow when converting timestamps --- cpp/src/arrow/adapters/orc/util.cc | 36 ++++++++++++++++-------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/cpp/src/arrow/adapters/orc/util.cc b/cpp/src/arrow/adapters/orc/util.cc index 6974faae59b..91260302abd 100644 --- a/cpp/src/arrow/adapters/orc/util.cc +++ b/cpp/src/arrow/adapters/orc/util.cc @@ -31,6 +31,7 @@ #include "arrow/util/bitmap_ops.h" #include "arrow/util/checked_cast.h" #include "arrow/util/decimal.h" +#include "arrow/util/int_util_overflow.h" #include "arrow/util/key_value_metadata.h" #include "arrow/util/range.h" #include "arrow/util/string.h" @@ -200,26 +201,27 @@ Status AppendTimestampBatch(liborc::ColumnVectorBatch* column_vector_batch, auto builder = checked_cast(abuilder); auto batch = checked_cast(column_vector_batch); - if (length == 0) { - return Status::OK(); - } - - const uint8_t* valid_bytes = nullptr; - if (batch->hasNulls) { - valid_bytes = reinterpret_cast(batch->notNull.data()) + offset; - } - const int64_t* seconds = batch->data.data() + offset; const int64_t* nanos = batch->nanoseconds.data() + offset; - auto transform_timestamp = [seconds, nanos](int64_t index) { - return seconds[index] * kOneSecondNanos + nanos[index]; - }; - - auto transform_range = internal::MakeLazyRange(transform_timestamp, length); - - RETURN_NOT_OK( - builder->AppendValues(transform_range.begin(), transform_range.end(), valid_bytes)); + const bool has_nulls = batch->hasNulls; + RETURN_NOT_OK(builder->Reserve(length)); + for (int64_t i = 0; i < length; i++) { + if (has_nulls && !batch->notNull[offset + i]) { + builder->UnsafeAppendNull(); + continue; + } + // A timestamp past ~year 2262 does not fit in int64 nanoseconds; computing + // it with a bare `seconds * kOneSecondNanos` is signed overflow. + int64_t value; + if (ARROW_PREDICT_FALSE( + internal::MultiplyWithOverflow(seconds[i], kOneSecondNanos, &value) || + internal::AddWithOverflow(value, nanos[i], &value))) { + return Status::Invalid("ORC timestamp (", seconds[i], "s + ", nanos[i], + "ns) is out of range for nanosecond resolution"); + } + builder->UnsafeAppend(value); + } return Status::OK(); } From 21f3c225d7f07e67c74293b40b6ffc71ebb689eb Mon Sep 17 00:00:00 2001 From: jmestwa-coder Date: Tue, 2 Jun 2026 23:31:19 +0530 Subject: [PATCH 2/2] Add test for out-of-range ORC timestamp conversion --- cpp/src/arrow/adapters/orc/adapter_test.cc | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cpp/src/arrow/adapters/orc/adapter_test.cc b/cpp/src/arrow/adapters/orc/adapter_test.cc index 714e61b22b1..4409d277b1d 100644 --- a/cpp/src/arrow/adapters/orc/adapter_test.cc +++ b/cpp/src/arrow/adapters/orc/adapter_test.cc @@ -1114,6 +1114,31 @@ TEST(TestWriteReadORCBatch, DenseUnionConversion) { TestUnionConversion(std::move(array)); } +TEST(TestWriteReadORCBatch, TimestampOutOfRangeIsRejected) { + // A timestamp far past year 2262 does not fit in int64 nanoseconds, so scaling + // seconds to nanoseconds overflows. The conversion must report it rather than + // produce a garbage value. + auto orc_type = liborc::Type::buildTypeFromString("struct"); + + MemoryOutputStream mem_stream(kDefaultSmallMemStreamSize); + auto writer = CreateWriter(/*stripe_size=*/1024, *orc_type, &mem_stream); + auto orc_batch = writer->createRowBatch(1); + + auto struct_batch = internal::checked_cast(orc_batch.get()); + auto ts_batch = + internal::checked_cast(struct_batch->fields[0]); + ts_batch->data[0] = 10000000000; // ~year 2286, overflows once scaled to nanos + ts_batch->nanoseconds[0] = 0; + ts_batch->numElements = 1; + ts_batch->hasNulls = false; + struct_batch->numElements = 1; + + ASSIGN_OR_ABORT(auto builder, MakeBuilder(timestamp(TimeUnit::NANO))); + ASSERT_RAISES(Invalid, + adapters::orc::AppendBatch(orc_type->getSubtype(0), struct_batch->fields[0], + /*offset=*/0, /*length=*/1, builder.get())); +} + class TestORCWriterMultipleWrite : public ::testing::Test { public: TestORCWriterMultipleWrite() : rand(kRandomSeed) {}