Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 33 additions & 2 deletions cpp/src/parquet/decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1038,7 +1038,13 @@ void DictDecoderImpl<Type>::SetDict(TypedDecoder<Type>* dictionary) {

template <>
void DictDecoderImpl<BooleanType>::SetDict(TypedDecoder<BooleanType>* dictionary) {
ParquetException::NYI("Dictionary encoding is not implemented for boolean values");
dictionary_length_ = static_cast<int32_t>(dictionary->values_left());
PARQUET_THROW_NOT_OK(dictionary_->Resize(
static_cast<int64_t>(dictionary_length_) * sizeof(bool), false));
if (dictionary->Decode(dictionary_->mutable_data_as<bool>(), dictionary_length_) !=
dictionary_length_) {
throw ParquetException("Could not decode boolean dictionary values");
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it's a proper error message

}
}

template <>
Expand Down Expand Up @@ -1257,6 +1263,31 @@ void DictDecoderImpl<ByteArrayType>::InsertDictionary(::arrow::ArrayBuilder* bui
PARQUET_THROW_NOT_OK(binary_builder->InsertMemoValues(*arr));
}

// Dictionary decoder for boolean column data. Decodes PLAIN_DICTIONARY
// and RLE_DICTIONARY pages into the callers byte buffer.
class DictBooleanDecoderImpl : public DictDecoderImpl<BooleanType>,
virtual public BooleanDecoder {
public:
using BASE = DictDecoderImpl<BooleanType>;
using BASE::BASE;
using BASE::Decode;

int Decode(uint8_t* buffer, int max_values) override {
max_values = std::min(max_values, this->num_values_);
const auto* dict = dictionary_->data_as<bool>();
for (int i = 0; i < max_values; ++i) {
int32_t index;
if (ARROW_PREDICT_FALSE(!idx_decoder_.Get(&index))) {
ParquetException::EofException();
}
PARQUET_THROW_NOT_OK(IndexInBounds(index));
::arrow::bit_util::SetBitTo(buffer, i, dict[index]);
}
this->num_values_ -= max_values;
return max_values;
}
};

class DictByteArrayDecoderImpl : public DictDecoderImpl<ByteArrayType> {
public:
using BASE = DictDecoderImpl<ByteArrayType>;
Expand Down Expand Up @@ -2459,7 +2490,7 @@ std::unique_ptr<Decoder> MakeDictDecoder(Type::type type_num,
MemoryPool* pool) {
switch (type_num) {
case Type::BOOLEAN:
ParquetException::NYI("Dictionary encoding not implemented for boolean type");
return std::make_unique<DictBooleanDecoderImpl>(descr, pool);
case Type::INT32:
return std::make_unique<DictDecoderImpl<Int32Type>>(descr, pool);
case Type::INT64:
Expand Down
27 changes: 25 additions & 2 deletions cpp/src/parquet/encoding_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -462,8 +462,31 @@ TYPED_TEST(TestDictionaryEncoding, BasicRoundTrip) {
ASSERT_NO_FATAL_FAILURE(this->Execute(2500, 2));
}

TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) {
ASSERT_THROW(MakeDictDecoder<BooleanType>(nullptr), ParquetException);
// Decode a dictionary encoded boolean page and ensure decoding works.
TEST(TestDictionaryEncoding, DictDecodesBoolean) {
const uint8_t dict_bytes[] = {0x02};
auto dict_plain_decoder = MakeTypedDecoder<BooleanType>(Encoding::PLAIN);
dict_plain_decoder->SetData(2, dict_bytes, 1);

auto decoder = MakeDictDecoder<BooleanType>();
decoder->SetDict(dict_plain_decoder.get());

const uint8_t indices[] = {0x01, 0x03, 0x35};
decoder->SetData(8, indices, sizeof(indices));

bool out_bool[8] = {};
ASSERT_EQ(8, decoder->Decode(out_bool, 8));
const bool expected[8] = {true, false, true, false, true, true, false, false};
for (int i = 0; i < 8; ++i) {
EXPECT_EQ(expected[i], out_bool[i]) << " at index " << i;
}

decoder->SetData(8, indices, sizeof(indices));
uint8_t out_packed = 0;
auto* bool_dec = dynamic_cast<BooleanDecoder*>(decoder.get());
ASSERT_NE(bool_dec, nullptr);
ASSERT_EQ(8, bool_dec->Decode(&out_packed, 8));
EXPECT_EQ(0x35, out_packed);
}

// ----------------------------------------------------------------------
Expand Down
Loading