Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions cpp/src/arrow/adapters/orc/adapter_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1114,6 +1114,31 @@ TEST(TestWriteReadORCBatch, DenseUnionConversion) {
TestUnionConversion(std::move(array));
}

TEST(TestWriteReadORCBatch, TimestampOutOfRangeIsRejected) {
// A timestamp far past year 2262 does not fit in int64 nanoseconds, so scaling
// seconds to nanoseconds overflows. The conversion must report it rather than
// produce a garbage value.
auto orc_type = liborc::Type::buildTypeFromString("struct<ts:timestamp>");

MemoryOutputStream mem_stream(kDefaultSmallMemStreamSize);
auto writer = CreateWriter(/*stripe_size=*/1024, *orc_type, &mem_stream);
auto orc_batch = writer->createRowBatch(1);

auto struct_batch = internal::checked_cast<liborc::StructVectorBatch*>(orc_batch.get());
auto ts_batch =
internal::checked_cast<liborc::TimestampVectorBatch*>(struct_batch->fields[0]);
ts_batch->data[0] = 10000000000; // ~year 2286, overflows once scaled to nanos
ts_batch->nanoseconds[0] = 0;
ts_batch->numElements = 1;
ts_batch->hasNulls = false;
struct_batch->numElements = 1;

ASSIGN_OR_ABORT(auto builder, MakeBuilder(timestamp(TimeUnit::NANO)));
ASSERT_RAISES(Invalid,
adapters::orc::AppendBatch(orc_type->getSubtype(0), struct_batch->fields[0],
/*offset=*/0, /*length=*/1, builder.get()));
}

class TestORCWriterMultipleWrite : public ::testing::Test {
public:
TestORCWriterMultipleWrite() : rand(kRandomSeed) {}
Expand Down
36 changes: 19 additions & 17 deletions cpp/src/arrow/adapters/orc/util.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
#include "arrow/util/bitmap_ops.h"
#include "arrow/util/checked_cast.h"
#include "arrow/util/decimal.h"
#include "arrow/util/int_util_overflow.h"
#include "arrow/util/key_value_metadata.h"
#include "arrow/util/range.h"
#include "arrow/util/string.h"
Expand Down Expand Up @@ -200,26 +201,27 @@ Status AppendTimestampBatch(liborc::ColumnVectorBatch* column_vector_batch,
auto builder = checked_cast<TimestampBuilder*>(abuilder);
auto batch = checked_cast<liborc::TimestampVectorBatch*>(column_vector_batch);

if (length == 0) {
return Status::OK();
}

const uint8_t* valid_bytes = nullptr;
if (batch->hasNulls) {
valid_bytes = reinterpret_cast<const uint8_t*>(batch->notNull.data()) + offset;
}

const int64_t* seconds = batch->data.data() + offset;
const int64_t* nanos = batch->nanoseconds.data() + offset;

auto transform_timestamp = [seconds, nanos](int64_t index) {
return seconds[index] * kOneSecondNanos + nanos[index];
};

auto transform_range = internal::MakeLazyRange(transform_timestamp, length);

RETURN_NOT_OK(
builder->AppendValues(transform_range.begin(), transform_range.end(), valid_bytes));
const bool has_nulls = batch->hasNulls;
RETURN_NOT_OK(builder->Reserve(length));
for (int64_t i = 0; i < length; i++) {
if (has_nulls && !batch->notNull[offset + i]) {
builder->UnsafeAppendNull();
continue;
}
// A timestamp past ~year 2262 does not fit in int64 nanoseconds; computing
// it with a bare `seconds * kOneSecondNanos` is signed overflow.
int64_t value;
if (ARROW_PREDICT_FALSE(
internal::MultiplyWithOverflow(seconds[i], kOneSecondNanos, &value) ||
internal::AddWithOverflow(value, nanos[i], &value))) {
return Status::Invalid("ORC timestamp (", seconds[i], "s + ", nanos[i],
"ns) is out of range for nanosecond resolution");
}
builder->UnsafeAppend(value);
}
return Status::OK();
}

Expand Down