diff --git a/ext/bson/read.c b/ext/bson/read.c index 36d96f353..c03170625 100644 --- a/ext/bson/read.c +++ b/ext/bson/read.c @@ -27,15 +27,31 @@ static VALUE pvt_get_double(byte_buffer_t *b); static VALUE pvt_get_string(byte_buffer_t *b, const char *data_type); static VALUE pvt_get_symbol(byte_buffer_t *b, VALUE rb_buffer, int argc, VALUE *argv); static VALUE pvt_get_boolean(byte_buffer_t *b); -static VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv); +static VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv, int depth); +static VALUE pvt_get_hash_at_depth(int argc, VALUE *argv, VALUE self, int depth); +static VALUE pvt_get_array_at_depth(int argc, VALUE *argv, VALUE self, int depth); +static void pvt_check_nesting_depth(int depth); static void pvt_skip_cstring(byte_buffer_t *b); static size_t pvt_strnlen(const byte_buffer_t *b); +/* Maximum number of nested BSON documents or arrays the decoder will accept. + * Mirrors BSON::MAX_NESTING_DEPTH in lib/bson.rb. */ +#define BSON_RUBY_MAX_NESTING_DEPTH 200 + void pvt_raise_decode_error(volatile VALUE msg) { VALUE klass = pvt_const_get_3("BSON", "Error", "BSONDecodeError"); rb_exc_raise(rb_exc_new_str(klass, msg)); } +/* Raise BSON::Error::BSONDecodeError if the depth exceeds the cap. */ +void pvt_check_nesting_depth(int depth) { + if (depth > BSON_RUBY_MAX_NESTING_DEPTH) { + pvt_raise_decode_error(rb_sprintf( + "BSON document nesting depth exceeds maximum of %d", + BSON_RUBY_MAX_NESTING_DEPTH)); + } +} + /** * validate the buffer contains the amount of bytes the array / hash claimns * and that it is null terminated @@ -66,9 +82,10 @@ int32_t pvt_validate_length(byte_buffer_t *b) } /** - * Read a single field from a hash or array + * Read a single field from a hash or array. `depth` is the current nesting + * depth; nested documents/arrays bump it before recursing. */ -VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv) +VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, VALUE *argv, int depth) { switch(type) { case BSON_TYPE_INT32: return pvt_get_int32(b); @@ -76,8 +93,8 @@ VALUE pvt_read_field(byte_buffer_t *b, VALUE rb_buffer, uint8_t type, int argc, case BSON_TYPE_DOUBLE: return pvt_get_double(b); case BSON_TYPE_STRING: return pvt_get_string(b, "String"); case BSON_TYPE_SYMBOL: return pvt_get_symbol(b, rb_buffer, argc, argv); - case BSON_TYPE_ARRAY: return rb_bson_byte_buffer_get_array(argc, argv, rb_buffer); - case BSON_TYPE_DOCUMENT: return rb_bson_byte_buffer_get_hash(argc, argv, rb_buffer); + case BSON_TYPE_ARRAY: return pvt_get_array_at_depth(argc, argv, rb_buffer, depth + 1); + case BSON_TYPE_DOCUMENT: return pvt_get_hash_at_depth(argc, argv, rb_buffer, depth + 1); case BSON_TYPE_BOOLEAN: return pvt_get_boolean(b); default: { @@ -397,6 +414,10 @@ static int pvt_is_dbref(VALUE doc) { } VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){ + return pvt_get_hash_at_depth(argc, argv, self, 1); +} + +VALUE pvt_get_hash_at_depth(int argc, VALUE *argv, VALUE self, int depth){ VALUE doc = Qnil; byte_buffer_t *b = NULL; uint8_t type; @@ -404,6 +425,8 @@ VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){ int32_t length; char *start_ptr; + pvt_check_nesting_depth(depth); + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); start_ptr = READ_PTR(b); @@ -413,7 +436,7 @@ VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){ while((type = pvt_get_type_byte(b)) != 0){ VALUE field = rb_bson_byte_buffer_get_cstring(self); - rb_hash_aset(doc, field, pvt_read_field(b, self, type, argc, argv)); + rb_hash_aset(doc, field, pvt_read_field(b, self, type, argc, argv, depth)); RB_GC_GUARD(field); } @@ -430,12 +453,18 @@ VALUE rb_bson_byte_buffer_get_hash(int argc, VALUE *argv, VALUE self){ } VALUE rb_bson_byte_buffer_get_array(int argc, VALUE *argv, VALUE self){ + return pvt_get_array_at_depth(argc, argv, self, 1); +} + +VALUE pvt_get_array_at_depth(int argc, VALUE *argv, VALUE self, int depth){ byte_buffer_t *b; VALUE array = Qnil; uint8_t type; int32_t length; char *start_ptr; + pvt_check_nesting_depth(depth); + TypedData_Get_Struct(self, byte_buffer_t, &rb_byte_buffer_data_type, b); start_ptr = READ_PTR(b); @@ -444,7 +473,7 @@ VALUE rb_bson_byte_buffer_get_array(int argc, VALUE *argv, VALUE self){ array = rb_ary_new(); while((type = pvt_get_type_byte(b)) != 0){ pvt_skip_cstring(b); - rb_ary_push(array, pvt_read_field(b, self, type, argc, argv)); + rb_ary_push(array, pvt_read_field(b, self, type, argc, argv, depth)); } RB_GC_GUARD(array); diff --git a/lib/bson.rb b/lib/bson.rb index 2148abfd4..15a54f373 100644 --- a/lib/bson.rb +++ b/lib/bson.rb @@ -56,6 +56,29 @@ def self.ObjectId(string) # # @since 2.0.0 UTF8 = "UTF-8" + + # Maximum number of nested BSON documents or arrays the decoder will accept. + # Prevents stack-overflow DoS on adversarial input. Matches libbson and the + # Go driver's ExtJSON parser. + MAX_NESTING_DEPTH = 200 + + # Bump the per-thread BSON decode nesting counter and raise if it exceeds + # MAX_NESTING_DEPTH. Pair every call with `leave_nesting_depth` in an + # `ensure` block. The bump is inlined at each callsite (rather than a + # block-yielding helper) to keep JRuby JVM stack frame counts low enough + # that the check fires before the JVM stack overflows on adversarial input. + def self.enter_nesting_depth + depth = (Thread.current[:_bson_nesting_depth] ||= 0) + 1 + if depth > MAX_NESTING_DEPTH + raise Error::BSONDecodeError, + "BSON document nesting depth exceeds maximum of #{MAX_NESTING_DEPTH}" + end + Thread.current[:_bson_nesting_depth] = depth + end + + def self.leave_nesting_depth + Thread.current[:_bson_nesting_depth] -= 1 + end end require "bson/config" diff --git a/lib/bson/array.rb b/lib/bson/array.rb index fe0f8300a..86b30f753 100644 --- a/lib/bson/array.rb +++ b/lib/bson/array.rb @@ -143,15 +143,20 @@ def from_bson(buffer, **options) # @raise [ BSON::Error::BSONDecodeError ] if the expected number of # bytes were not read from the buffer def parse_array_from_buffer(buffer, **options) - new.tap do |array| - start_position = buffer.read_position - expected_byte_size = buffer.get_int32 - parse_array_elements_from_buffer(array, buffer, **options) - actual_byte_size = buffer.read_position - start_position - if actual_byte_size != expected_byte_size - raise Error::BSONDecodeError, - "Expected array to take #{expected_byte_size} bytes but it took #{actual_byte_size} bytes" + BSON.enter_nesting_depth + begin + new.tap do |array| + start_position = buffer.read_position + expected_byte_size = buffer.get_int32 + parse_array_elements_from_buffer(array, buffer, **options) + actual_byte_size = buffer.read_position - start_position + if actual_byte_size != expected_byte_size + raise Error::BSONDecodeError, + "Expected array to take #{expected_byte_size} bytes but it took #{actual_byte_size} bytes" + end end + ensure + BSON.leave_nesting_depth end end diff --git a/lib/bson/ext_json.rb b/lib/bson/ext_json.rb index b522e6f34..23772e69d 100644 --- a/lib/bson/ext_json.rb +++ b/lib/bson/ext_json.rb @@ -115,8 +115,13 @@ module ExtJSON when Hash parse_hash(value, **options) when Array - value.map do |item| - parse_obj(item, **options) + BSON.enter_nesting_depth + begin + value.map do |item| + parse_obj(item, **options) + end + ensure + BSON.leave_nesting_depth end else raise Error::ExtJSONParseError, "Unknown value type: #{value}" @@ -136,6 +141,15 @@ module ExtJSON end].freeze module_function def parse_hash(hash, **options) + BSON.enter_nesting_depth + begin + return parse_hash_body(hash, **options) + ensure + BSON.leave_nesting_depth + end + end + + module_function def parse_hash_body(hash, **options) if hash.empty? return {} end diff --git a/lib/bson/hash.rb b/lib/bson/hash.rb index 197349cc5..8fe0c41a5 100644 --- a/lib/bson/hash.rb +++ b/lib/bson/hash.rb @@ -167,17 +167,22 @@ def maybe_dbref(hash) # # @return [ Hash ] the hash parsed from the buffer def parse_hash_from_buffer(buffer, **options) - hash = Document.allocate - start_position = buffer.read_position - expected_byte_size = buffer.get_int32 + BSON.enter_nesting_depth + begin + hash = Document.allocate + start_position = buffer.read_position + expected_byte_size = buffer.get_int32 - parse_hash_contents(hash, buffer, **options) + parse_hash_contents(hash, buffer, **options) - actual_byte_size = buffer.read_position - start_position - return hash unless actual_byte_size != expected_byte_size + actual_byte_size = buffer.read_position - start_position + return hash unless actual_byte_size != expected_byte_size - raise Error::BSONDecodeError, - "Expected hash to take #{expected_byte_size} bytes but it took #{actual_byte_size} bytes" + raise Error::BSONDecodeError, + "Expected hash to take #{expected_byte_size} bytes but it took #{actual_byte_size} bytes" + ensure + BSON.leave_nesting_depth + end end # Given an empty hash and a byte buffer, parse the key/value pairs from diff --git a/spec/bson/max_nesting_depth_spec.rb b/spec/bson/max_nesting_depth_spec.rb new file mode 100644 index 000000000..0ae4d8515 --- /dev/null +++ b/spec/bson/max_nesting_depth_spec.rb @@ -0,0 +1,198 @@ +# frozen_string_literal: true +# rubocop:todo all +# Copyright (C) 2026 MongoDB Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require "spec_helper" + +describe "BSON nesting depth limit" do + # On JRuby, very deep recursion may overflow the JVM thread stack before + # our depth counter fires (each Ruby method invocation inflates to multiple + # JVM frames). Either outcome — BSON::Error::BSONDecodeError or a JVM + # StackOverflowError — means the process did not crash. + # + # The Java throwable is not a subclass of Ruby's Exception, so we rescue it + # by reference. On MRI it is replaced with an unreachable sentinel. + java_stack_overflow = if defined?(JRUBY_VERSION) + eval('Java::JavaLang::StackOverflowError') + else + Class.new(Exception) + end + + matcher :raise_decode_error_or_stack_overflow do + supports_block_expectations + match do |block| + begin + block.call + false + rescue BSON::Error::BSONDecodeError, SystemStackError, java_stack_overflow + true + end + end + end + + + # Build a BSON byte string with exactly n nested documents: + # n=1 -> {} (5 bytes), n=2 -> {a: {}}, n=3 -> {a: {a: {}}}, ... + # Each wrapping level adds 8 bytes of overhead: + # int32 length(4) + type(1) + cstring "a\0"(2) + ... + terminator(1) + # Innermost empty subdoc is 5 bytes. + def deeply_nested_bson(n) + wrappers = n - 1 + total = 5 + wrappers * 8 + buf = String.new(capacity: total).force_encoding("BINARY") + remaining = total + wrappers.times do + buf << [remaining].pack("V") + buf << "\x03".b + buf << "a\x00".b + remaining -= 8 + end + buf << "\x05\x00\x00\x00\x00".b + wrappers.times { buf << "\x00".b } + buf + end + + # Build a BSON byte string with exactly n nested arrays: [], [[]], [[[]]]... + # Each wrapping level: int32(4) + type 0x04(1) + cstring "0\0"(2) + ... + 0x00(1) = 8. + def deeply_nested_array_bson(n) + wrappers = n - 1 + total = 5 + wrappers * 8 + buf = String.new(capacity: total).force_encoding("BINARY") + remaining = total + wrappers.times do + buf << [remaining].pack("V") + buf << "\x04".b + buf << "0\x00".b + remaining -= 8 + end + buf << "\x05\x00\x00\x00\x00".b + wrappers.times { buf << "\x00".b } + buf + end + + describe "Hash.from_bson" do + context "when nesting is at the maximum depth" do + let(:bytes) { deeply_nested_bson(BSON::MAX_NESTING_DEPTH) } + + it "decodes successfully" do + expect { + Hash.from_bson(BSON::ByteBuffer.new(bytes)) + }.not_to raise_error + end + end + + context "when nesting exceeds the maximum depth" do + let(:bytes) { deeply_nested_bson(BSON::MAX_NESTING_DEPTH + 1) } + + it "raises BSONDecodeError" do + expect { + Hash.from_bson(BSON::ByteBuffer.new(bytes)) + }.to raise_error(BSON::Error::BSONDecodeError, /nesting/i) + end + end + + context "when nesting is far beyond the maximum (DoS payload)" do + let(:bytes) { deeply_nested_bson(100_000) } + + it "raises a decode error or stack overflow without crashing the process" do + expect { + Hash.from_bson(BSON::ByteBuffer.new(bytes)) + }.to raise_decode_error_or_stack_overflow + end + end + end + + describe "Array.from_bson via Hash" do + context "when array nesting exceeds the maximum depth" do + # Wrap deep arrays inside a single hash so we go through the registered + # types path: { a: [ [ [ ... ] ] ] } + let(:bytes) do + inner = deeply_nested_array_bson(BSON::MAX_NESTING_DEPTH + 1) + # Wrap: int32 len + 0x04(array) + "a\0" + inner + 0x00 + len = 4 + 1 + 2 + inner.bytesize + 1 + out = String.new(capacity: len).force_encoding("BINARY") + out << [len].pack("V") + out << "\x04".b + out << "a\x00".b + out << inner + out << "\x00".b + out + end + + it "raises BSONDecodeError" do + expect { + Hash.from_bson(BSON::ByteBuffer.new(bytes)) + }.to raise_error(BSON::Error::BSONDecodeError, /nesting/i) + end + end + end + + describe "BSON::ExtJSON.parse_obj" do + # Build a Ruby structure with exactly n levels of hash nesting: + # n=1 -> {}, n=2 -> {a: {}}, n=3 -> {a: {a: {}}}, ... + def deeply_nested_hash(n) + h = {} + cur = h + (n - 1).times { cur["a"] = nxt = {}; cur = nxt } + h + end + + # Same for arrays: n=1 -> [], n=2 -> [[]], ... + def deeply_nested_array(n) + a = [] + cur = a + (n - 1).times { cur << (nxt = []); cur = nxt } + a + end + + context "when hash nesting is at the maximum depth" do + let(:input) { deeply_nested_hash(BSON::MAX_NESTING_DEPTH) } + + it "parses successfully" do + expect { BSON::ExtJSON.parse_obj(input) }.not_to raise_error + end + end + + context "when hash nesting exceeds the maximum depth" do + let(:input) { deeply_nested_hash(BSON::MAX_NESTING_DEPTH + 1) } + + it "raises BSONDecodeError" do + expect { + BSON::ExtJSON.parse_obj(input) + }.to raise_error(BSON::Error::BSONDecodeError, /nesting/i) + end + end + + context "when array nesting exceeds the maximum depth" do + let(:input) { deeply_nested_array(BSON::MAX_NESTING_DEPTH + 1) } + + it "raises BSONDecodeError" do + expect { + BSON::ExtJSON.parse_obj(input) + }.to raise_error(BSON::Error::BSONDecodeError, /nesting/i) + end + end + + context "when nesting is far beyond the maximum (DoS payload)" do + let(:input) { deeply_nested_hash(50_000) } + + it "raises a decode error or stack overflow without crashing the process" do + expect { + BSON::ExtJSON.parse_obj(input) + }.to raise_decode_error_or_stack_overflow + end + end + end +end