PLAIN encoding writer for a single Parquet column.
More...
#include <column_writer.hpp>
|
| | ColumnWriter (PhysicalType type, int32_t type_length=-1) |
| | Construct a writer for the given Parquet physical type.
|
| |
| void | write_bool (bool val) |
| | Write a single boolean value.
|
| |
| void | write_int32 (int32_t val) |
| | Write a single INT32 value (4 bytes little-endian).
|
| |
| void | write_int64 (int64_t val) |
| | Write a single INT64 value (8 bytes little-endian).
|
| |
| void | write_float (float val) |
| | Write a single FLOAT value (4 bytes little-endian, IEEE 754).
|
| |
| void | write_double (double val) |
| | Write a single DOUBLE value (8 bytes little-endian, IEEE 754).
|
| |
| void | write_byte_array (const std::string &val) |
| | Write a single BYTE_ARRAY value from a std::string.
|
| |
| void | write_byte_array (const uint8_t *data, size_t len) |
| | Write a single BYTE_ARRAY value from raw bytes.
|
| |
| void | write_fixed_len_byte_array (const uint8_t *data, size_t len) |
| | Write a single FIXED_LEN_BYTE_ARRAY value from raw bytes.
|
| |
| template<typename T > |
| void | write (const T &val) |
| | Write a single value, dispatching to the correct typed write method.
|
| |
| template<typename T > |
| void | write_batch (const T *values, size_t count) |
| | Write a batch of typed values.
|
| |
| void | write_batch (const std::string *values, size_t count) |
| | Write a batch of string values (BYTE_ARRAY).
|
| |
| const std::vector< uint8_t > & | data () const |
| | Returns a const reference to the encoded byte buffer.
|
| |
| size_t | encoded_size () const |
| | Returns the total encoded data size in bytes.
|
| |
| int64_t | num_values () const |
| | Returns the number of values written so far.
|
| |
| const ColumnStatistics & | statistics () const |
| | Returns a const reference to the column statistics.
|
| |
| void | reset () |
| | Reset the writer for the next column chunk. Clears all data and statistics.
|
| |
| PhysicalType | type () const |
| | Returns the physical type this writer encodes.
|
| |
PLAIN encoding writer for a single Parquet column.
Encodes values into an internal byte buffer using the PLAIN encoding and simultaneously maintains ColumnStatistics. After all values have been written, call data() to retrieve the encoded buffer and statistics() for the accumulated column-chunk statistics.
- Note
- The writer does not enforce type safety at the column level – it is the caller's responsibility to use the correct write method for the configured PhysicalType.
- See also
- ColumnReader (the decoding counterpart)
-
ColumnStatistics
Definition at line 66 of file column_writer.hpp.
◆ ColumnWriter()
| signet::forge::ColumnWriter::ColumnWriter |
( |
PhysicalType |
type, |
|
|
int32_t |
type_length = -1 |
|
) |
| |
|
inlineexplicit |
Construct a writer for the given Parquet physical type.
- Parameters
-
| type | The physical type this writer will encode. |
| type_length | For FIXED_LEN_BYTE_ARRAY columns, the fixed byte length per value (from schema). Ignored for other types. |
Definition at line 72 of file column_writer.hpp.
◆ data()
| const std::vector< uint8_t > & signet::forge::ColumnWriter::data |
( |
| ) |
const |
|
inline |
Returns a const reference to the encoded byte buffer.
Definition at line 257 of file column_writer.hpp.
◆ encoded_size()
| size_t signet::forge::ColumnWriter::encoded_size |
( |
| ) |
const |
|
inline |
◆ num_values()
| int64_t signet::forge::ColumnWriter::num_values |
( |
| ) |
const |
|
inline |
◆ reset()
| void signet::forge::ColumnWriter::reset |
( |
| ) |
|
|
inline |
Reset the writer for the next column chunk. Clears all data and statistics.
Definition at line 273 of file column_writer.hpp.
◆ statistics()
Returns a const reference to the column statistics.
Definition at line 268 of file column_writer.hpp.
◆ type()
◆ write()
template<typename T >
| void signet::forge::ColumnWriter::write |
( |
const T & |
val | ) |
|
|
inline |
Write a single value, dispatching to the correct typed write method.
- Template Parameters
-
| T | Supported: bool, int32_t, int64_t, float, double, std::string. |
- Parameters
-
Definition at line 212 of file column_writer.hpp.
◆ write_batch() [1/2]
| void signet::forge::ColumnWriter::write_batch |
( |
const std::string * |
values, |
|
|
size_t |
count |
|
) |
| |
|
inline |
Write a batch of string values (BYTE_ARRAY).
- Parameters
-
| values | Pointer to a contiguous array of count strings. |
| count | Number of strings to write. |
Definition at line 248 of file column_writer.hpp.
◆ write_batch() [2/2]
template<typename T >
| void signet::forge::ColumnWriter::write_batch |
( |
const T * |
values, |
|
|
size_t |
count |
|
) |
| |
|
inline |
Write a batch of typed values.
- Template Parameters
-
- Parameters
-
| values | Pointer to a contiguous array of count values. |
| count | Number of values to write. |
Definition at line 239 of file column_writer.hpp.
◆ write_bool()
| void signet::forge::ColumnWriter::write_bool |
( |
bool |
val | ) |
|
|
inline |
Write a single boolean value.
PLAIN encoding: bit-packed, LSB first. Bit i of byte (i/8) is set if value[i] is true. N values produce ceil(N/8) bytes.
- Parameters
-
| val | The boolean to encode. |
Definition at line 83 of file column_writer.hpp.
◆ write_byte_array() [1/2]
| void signet::forge::ColumnWriter::write_byte_array |
( |
const std::string & |
val | ) |
|
|
inline |
Write a single BYTE_ARRAY value from a std::string.
PLAIN encoding: 4-byte LE length prefix + raw bytes.
- Parameters
-
| val | The string whose bytes are encoded. |
Definition at line 150 of file column_writer.hpp.
◆ write_byte_array() [2/2]
| void signet::forge::ColumnWriter::write_byte_array |
( |
const uint8_t * |
data, |
|
|
size_t |
len |
|
) |
| |
|
inline |
Write a single BYTE_ARRAY value from raw bytes.
PLAIN encoding: 4-byte LE length prefix + raw bytes.
- Parameters
-
| data | Pointer to the raw byte payload. |
| len | Number of bytes in the payload. |
Definition at line 160 of file column_writer.hpp.
◆ write_double()
| void signet::forge::ColumnWriter::write_double |
( |
double |
val | ) |
|
|
inline |
Write a single DOUBLE value (8 bytes little-endian, IEEE 754).
- Parameters
-
Definition at line 136 of file column_writer.hpp.
◆ write_fixed_len_byte_array()
| void signet::forge::ColumnWriter::write_fixed_len_byte_array |
( |
const uint8_t * |
data, |
|
|
size_t |
len |
|
) |
| |
|
inline |
Write a single FIXED_LEN_BYTE_ARRAY value from raw bytes.
PLAIN encoding: raw bytes only (no length prefix – length is in the schema).
- Precondition
len must exactly match the schema's type_length. Passing a different length produces a corrupt column chunk (no runtime check is performed here for performance – the caller must validate).
- Parameters
-
| data | Pointer to the raw byte payload. |
| len | Number of bytes (must match the schema's type_length). |
Definition at line 187 of file column_writer.hpp.
◆ write_float()
| void signet::forge::ColumnWriter::write_float |
( |
float |
val | ) |
|
|
inline |
Write a single FLOAT value (4 bytes little-endian, IEEE 754).
- Parameters
-
Definition at line 125 of file column_writer.hpp.
◆ write_int32()
| void signet::forge::ColumnWriter::write_int32 |
( |
int32_t |
val | ) |
|
|
inline |
Write a single INT32 value (4 bytes little-endian).
- Parameters
-
| val | The 32-bit integer to encode. |
Definition at line 103 of file column_writer.hpp.
◆ write_int64()
| void signet::forge::ColumnWriter::write_int64 |
( |
int64_t |
val | ) |
|
|
inline |
Write a single INT64 value (8 bytes little-endian).
- Parameters
-
| val | The 64-bit integer to encode. |
Definition at line 114 of file column_writer.hpp.
The documentation for this class was generated from the following file: