-
Notifications
You must be signed in to change notification settings - Fork 4.1k
GH-49884: [C++] Fix integer overflow in BufferBuilder reachable from JSON parsing #49883
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
metsw24-max
wants to merge
2
commits into
apache:main
Choose a base branch
from
metsw24-max:bufferbuilder-integer-overflow-json-path
base: main
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
+135
−15
Open
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -27,6 +27,7 @@ | |
| #include "arrow/buffer.h" | ||
| #include "arrow/status.h" | ||
| #include "arrow/util/bit_util.h" | ||
| #include "arrow/util/int_util_overflow.h" | ||
| #include "arrow/util/bitmap_generate.h" | ||
| #include "arrow/util/bitmap_ops.h" | ||
| #include "arrow/util/macros.h" | ||
|
|
@@ -35,6 +36,10 @@ | |
|
|
||
| namespace arrow { | ||
|
|
||
| static inline bool BufferBuilderCapacityTooLarge(int64_t capacity) { | ||
| return ARROW_PREDICT_FALSE(capacity > std::numeric_limits<int64_t>::max() - 63); | ||
| } | ||
|
|
||
| // ---------------------------------------------------------------------- | ||
| // Buffer builder classes | ||
|
|
||
|
|
@@ -74,6 +79,12 @@ class ARROW_EXPORT BufferBuilder { | |
| /// shrinking the builder. | ||
| /// \return Status | ||
| Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) { | ||
| if (ARROW_PREDICT_FALSE(new_capacity < 0)) { | ||
| return Status::Invalid("Resize: negative capacity"); | ||
| } | ||
| if (ARROW_PREDICT_FALSE(BufferBuilderCapacityTooLarge(new_capacity))) { | ||
| return Status::CapacityError("Resize: capacity overflow"); | ||
| } | ||
| if (buffer_ == NULLPTR) { | ||
| ARROW_ASSIGN_OR_RAISE(buffer_, | ||
| AllocateResizableBuffer(new_capacity, alignment_, pool_)); | ||
|
|
@@ -91,11 +102,25 @@ class ARROW_EXPORT BufferBuilder { | |
| /// \param[in] additional_bytes number of additional bytes to make space for | ||
| /// \return Status | ||
| Status Reserve(const int64_t additional_bytes) { | ||
| auto min_capacity = size_ + additional_bytes; | ||
| if (ARROW_PREDICT_FALSE(additional_bytes < 0)) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's a lot of similar checks that we're adding in many methods. Can we find a way of factoring these out, and hopefully avoid forgetting any call sites where such checks must be done? |
||
| return Status::Invalid("Reserve: negative additional_bytes"); | ||
| } | ||
| int64_t min_capacity; | ||
| if (ARROW_PREDICT_FALSE( | ||
| internal::AddWithOverflow(size_, additional_bytes, &min_capacity))) { | ||
| return Status::CapacityError("Reserve: capacity overflow"); | ||
| } | ||
| if (ARROW_PREDICT_FALSE(BufferBuilderCapacityTooLarge(min_capacity))) { | ||
| return Status::CapacityError("Reserve: capacity overflow"); | ||
| } | ||
| if (min_capacity <= capacity_) { | ||
| return Status::OK(); | ||
| } | ||
| return Resize(GrowByFactor(capacity_, min_capacity), false); | ||
| int64_t requested_capacity = GrowByFactor(capacity_, min_capacity); | ||
| if (ARROW_PREDICT_FALSE(BufferBuilderCapacityTooLarge(requested_capacity))) { | ||
| return Status::CapacityError("Reserve: capacity overflow"); | ||
| } | ||
| return Resize(requested_capacity, false); | ||
| } | ||
|
|
||
| /// \brief Return a capacity expanded by the desired growth factor | ||
|
|
@@ -104,15 +129,27 @@ class ARROW_EXPORT BufferBuilder { | |
| // (versus 1.5x) seems to have slightly better performance when using | ||
| // jemalloc, but significantly better performance when using the system | ||
| // allocator. See ARROW-6450 for further discussion | ||
| return std::max(new_capacity, current_capacity * 2); | ||
| int64_t doubled; | ||
| if (ARROW_PREDICT_FALSE(internal::AddWithOverflow(current_capacity, current_capacity, | ||
| &doubled))) { | ||
| return new_capacity; | ||
| } | ||
| return std::max(new_capacity, doubled); | ||
| } | ||
|
|
||
| /// \brief Append the given data to the buffer | ||
| /// | ||
| /// The buffer is automatically expanded if necessary. | ||
| Status Append(const void* data, const int64_t length) { | ||
| if (ARROW_PREDICT_FALSE(size_ + length > capacity_)) { | ||
| ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, size_ + length), false)); | ||
| if (ARROW_PREDICT_FALSE(length < 0)) { | ||
| return Status::Invalid("Append: negative length"); | ||
| } | ||
| int64_t new_size; | ||
| if (ARROW_PREDICT_FALSE(internal::AddWithOverflow(size_, length, &new_size))) { | ||
| return Status::CapacityError("Append: size overflow"); | ||
| } | ||
| if (ARROW_PREDICT_FALSE(new_size > capacity_)) { | ||
| ARROW_RETURN_NOT_OK(Resize(GrowByFactor(capacity_, new_size), false)); | ||
| } | ||
| UnsafeAppend(data, length); | ||
| return Status::OK(); | ||
|
|
@@ -186,6 +223,9 @@ class ARROW_EXPORT BufferBuilder { | |
| /// mostly for memory allocation). | ||
| Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length, | ||
| bool shrink_to_fit = true) { | ||
| if (ARROW_PREDICT_FALSE(final_length < 0)) { | ||
| return Status::Invalid("FinishWithLength: negative final length"); | ||
| } | ||
| size_ = final_length; | ||
| return Finish(shrink_to_fit); | ||
| } | ||
|
|
@@ -250,12 +290,22 @@ class TypedBufferBuilder< | |
| } | ||
|
|
||
| Status Append(const T* values, int64_t num_elements) { | ||
| return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values), | ||
| num_elements * sizeof(T)); | ||
| if (ARROW_PREDICT_FALSE(num_elements < 0)) { | ||
| return Status::Invalid("Append: negative number of elements"); | ||
| } | ||
| int64_t num_bytes; | ||
| if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( | ||
| num_elements, static_cast<int64_t>(sizeof(T)), &num_bytes))) { | ||
| return Status::CapacityError("Append: size overflow"); | ||
| } | ||
| return bytes_builder_.Append(reinterpret_cast<const uint8_t*>(values), num_bytes); | ||
| } | ||
|
|
||
| Status Append(const int64_t num_copies, T value) { | ||
| ARROW_RETURN_NOT_OK(Reserve(num_copies + length())); | ||
| if (ARROW_PREDICT_FALSE(num_copies < 0)) { | ||
| return Status::Invalid("Append: negative number of copies"); | ||
| } | ||
| ARROW_RETURN_NOT_OK(Reserve(num_copies)); | ||
| UnsafeAppend(num_copies, value); | ||
| return Status::OK(); | ||
| } | ||
|
|
@@ -284,15 +334,45 @@ class TypedBufferBuilder< | |
| } | ||
|
|
||
| Status Resize(const int64_t new_capacity, bool shrink_to_fit = true) { | ||
| return bytes_builder_.Resize(new_capacity * sizeof(T), shrink_to_fit); | ||
| if (ARROW_PREDICT_FALSE(new_capacity < 0)) { | ||
| return Status::Invalid("Resize: negative capacity"); | ||
| } | ||
| int64_t num_bytes; | ||
| if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( | ||
| new_capacity, static_cast<int64_t>(sizeof(T)), &num_bytes))) { | ||
| return Status::CapacityError("Resize: capacity overflow"); | ||
| } | ||
| if (ARROW_PREDICT_FALSE(BufferBuilderCapacityTooLarge(num_bytes))) { | ||
| return Status::CapacityError("Resize: capacity overflow"); | ||
| } | ||
| return bytes_builder_.Resize(num_bytes, shrink_to_fit); | ||
| } | ||
|
|
||
| Status Reserve(const int64_t additional_elements) { | ||
| return bytes_builder_.Reserve(additional_elements * sizeof(T)); | ||
| if (ARROW_PREDICT_FALSE(additional_elements < 0)) { | ||
| return Status::Invalid("Reserve: negative additional_elements"); | ||
| } | ||
| int64_t num_bytes; | ||
| if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( | ||
| additional_elements, static_cast<int64_t>(sizeof(T)), &num_bytes))) { | ||
| return Status::CapacityError("Reserve: size overflow"); | ||
| } | ||
| if (ARROW_PREDICT_FALSE(BufferBuilderCapacityTooLarge(num_bytes))) { | ||
| return Status::CapacityError("Reserve: size overflow"); | ||
| } | ||
| return bytes_builder_.Reserve(num_bytes); | ||
| } | ||
|
|
||
| Status Advance(const int64_t length) { | ||
| return bytes_builder_.Advance(length * sizeof(T)); | ||
| if (ARROW_PREDICT_FALSE(length < 0)) { | ||
| return Status::Invalid("Advance: negative length"); | ||
| } | ||
| int64_t num_bytes; | ||
| if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( | ||
| length, static_cast<int64_t>(sizeof(T)), &num_bytes))) { | ||
| return Status::CapacityError("Advance: size overflow"); | ||
| } | ||
| return bytes_builder_.Advance(num_bytes); | ||
| } | ||
|
|
||
| void UnsafeAdvance(const int64_t length) { | ||
|
|
@@ -316,7 +396,15 @@ class TypedBufferBuilder< | |
| /// only for memory allocation). | ||
| Result<std::shared_ptr<Buffer>> FinishWithLength(int64_t final_length, | ||
| bool shrink_to_fit = true) { | ||
| return bytes_builder_.FinishWithLength(final_length * sizeof(T), shrink_to_fit); | ||
| if (ARROW_PREDICT_FALSE(final_length < 0)) { | ||
| return Status::Invalid("FinishWithLength: negative final length"); | ||
| } | ||
| int64_t num_bytes; | ||
| if (ARROW_PREDICT_FALSE(internal::MultiplyWithOverflow( | ||
| final_length, static_cast<int64_t>(sizeof(T)), &num_bytes))) { | ||
| return Status::CapacityError("FinishWithLength: final length overflow"); | ||
| } | ||
| return bytes_builder_.FinishWithLength(num_bytes, shrink_to_fit); | ||
| } | ||
|
|
||
| void Reset() { bytes_builder_.Reset(); } | ||
|
|
@@ -429,9 +517,18 @@ class TypedBufferBuilder<bool> { | |
| } | ||
|
|
||
| Status Reserve(const int64_t additional_elements) { | ||
| return Resize( | ||
| BufferBuilder::GrowByFactor(bit_length_, bit_length_ + additional_elements), | ||
| false); | ||
| if (ARROW_PREDICT_FALSE(additional_elements < 0)) { | ||
| return Status::Invalid("Reserve: negative additional_elements"); | ||
| } | ||
| int64_t min_length; | ||
| if (ARROW_PREDICT_FALSE( | ||
| internal::AddWithOverflow(bit_length_, additional_elements, &min_length))) { | ||
| return Status::CapacityError("Reserve: capacity overflow"); | ||
| } | ||
| if (ARROW_PREDICT_FALSE(BufferBuilderCapacityTooLarge(min_length))) { | ||
| return Status::CapacityError("Reserve: capacity overflow"); | ||
| } | ||
| return Resize(min_length, false); | ||
| } | ||
|
|
||
| Status Advance(const int64_t length) { | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why not put these checks in
AllocateResizableBufferandPoolBuffer::Resizeinstead? That would benefit more use cases.