PLAIN encoding writer for a single Parquet column.
More...
#include <column_writer.hpp>
|
| | ColumnWriter (PhysicalType type) |
| | Construct a writer for the given Parquet physical type.
|
| |
| void | write_bool (bool val) |
| | Write a single boolean value.
|
| |
| void | write_int32 (int32_t val) |
| | Write a single INT32 value (4 bytes little-endian).
|
| |
| void | write_int64 (int64_t val) |
| | Write a single INT64 value (8 bytes little-endian).
|
| |
| void | write_float (float val) |
| | Write a single FLOAT value (4 bytes little-endian, IEEE 754).
|
| |
| void | write_double (double val) |
| | Write a single DOUBLE value (8 bytes little-endian, IEEE 754).
|
| |
| void | write_byte_array (const std::string &val) |
| | Write a single BYTE_ARRAY value from a std::string.
|
| |
| void | write_byte_array (const uint8_t *data, size_t len) |
| | Write a single BYTE_ARRAY value from raw bytes.
|
| |
| void | write_fixed_len_byte_array (const uint8_t *data, size_t len) |
| | Write a single FIXED_LEN_BYTE_ARRAY value from raw bytes.
|
| |
| template<typename T > |
| void | write (const T &val) |
| | Write a single value, dispatching to the correct typed write method.
|
| |
| template<typename T > |
| void | write_batch (const T *values, size_t count) |
| | Write a batch of typed values.
|
| |
| void | write_batch (const std::string *values, size_t count) |
| | Write a batch of string values (BYTE_ARRAY).
|
| |
| const std::vector< uint8_t > & | data () const |
| | Returns a const reference to the encoded byte buffer.
|
| |
| size_t | encoded_size () const |
| | Returns the total encoded data size in bytes.
|
| |
| int64_t | num_values () const |
| | Returns the number of values written so far.
|
| |
| const ColumnStatistics & | statistics () const |
| | Returns a const reference to the column statistics.
|
| |
| void | reset () |
| | Reset the writer for the next column chunk. Clears all data and statistics.
|
| |
| PhysicalType | type () const |
| | Returns the physical type this writer encodes.
|
| |
PLAIN encoding writer for a single Parquet column.
Encodes values into an internal byte buffer using the PLAIN encoding and simultaneously maintains ColumnStatistics. After all values have been written, call data() to retrieve the encoded buffer and statistics() for the accumulated column-chunk statistics.
- Note
- The writer does not enforce type safety at the column level – it is the caller's responsibility to use the correct write method for the configured PhysicalType.
- See also
- ColumnReader (the decoding counterpart)
-
ColumnStatistics
Definition at line 66 of file column_writer.hpp.
◆ ColumnWriter()
| signet::forge::ColumnWriter::ColumnWriter |
( |
PhysicalType |
type | ) |
|
|
inlineexplicit |
Construct a writer for the given Parquet physical type.
- Parameters
-
| type | The physical type this writer will encode. |
Definition at line 70 of file column_writer.hpp.
◆ data()
| const std::vector< uint8_t > & signet::forge::ColumnWriter::data |
( |
| ) |
const |
|
inline |
Returns a const reference to the encoded byte buffer.
Definition at line 247 of file column_writer.hpp.
◆ encoded_size()
| size_t signet::forge::ColumnWriter::encoded_size |
( |
| ) |
const |
|
inline |
◆ num_values()
| int64_t signet::forge::ColumnWriter::num_values |
( |
| ) |
const |
|
inline |
◆ reset()
| void signet::forge::ColumnWriter::reset |
( |
| ) |
|
|
inline |
Reset the writer for the next column chunk. Clears all data and statistics.
Definition at line 263 of file column_writer.hpp.
◆ statistics()
Returns a const reference to the column statistics.
Definition at line 258 of file column_writer.hpp.
◆ type()
◆ write()
template<typename T >
| void signet::forge::ColumnWriter::write |
( |
const T & |
val | ) |
|
|
inline |
Write a single value, dispatching to the correct typed write method.
- Template Parameters
-
| T | Supported: bool, int32_t, int64_t, float, double, std::string. |
- Parameters
-
Definition at line 202 of file column_writer.hpp.
◆ write_batch() [1/2]
| void signet::forge::ColumnWriter::write_batch |
( |
const std::string * |
values, |
|
|
size_t |
count |
|
) |
| |
|
inline |
Write a batch of string values (BYTE_ARRAY).
- Parameters
-
| values | Pointer to a contiguous array of count strings. |
| count | Number of strings to write. |
Definition at line 238 of file column_writer.hpp.
◆ write_batch() [2/2]
template<typename T >
| void signet::forge::ColumnWriter::write_batch |
( |
const T * |
values, |
|
|
size_t |
count |
|
) |
| |
|
inline |
Write a batch of typed values.
- Template Parameters
-
- Parameters
-
| values | Pointer to a contiguous array of count values. |
| count | Number of values to write. |
Definition at line 229 of file column_writer.hpp.
◆ write_bool()
| void signet::forge::ColumnWriter::write_bool |
( |
bool |
val | ) |
|
|
inline |
Write a single boolean value.
PLAIN encoding: bit-packed, LSB first. Bit i of byte (i/8) is set if value[i] is true. N values produce ceil(N/8) bytes.
- Parameters
-
| val | The boolean to encode. |
Definition at line 81 of file column_writer.hpp.
◆ write_byte_array() [1/2]
| void signet::forge::ColumnWriter::write_byte_array |
( |
const std::string & |
val | ) |
|
|
inline |
Write a single BYTE_ARRAY value from a std::string.
PLAIN encoding: 4-byte LE length prefix + raw bytes.
- Parameters
-
| val | The string whose bytes are encoded. |
Definition at line 148 of file column_writer.hpp.
◆ write_byte_array() [2/2]
| void signet::forge::ColumnWriter::write_byte_array |
( |
const uint8_t * |
data, |
|
|
size_t |
len |
|
) |
| |
|
inline |
Write a single BYTE_ARRAY value from raw bytes.
PLAIN encoding: 4-byte LE length prefix + raw bytes.
- Parameters
-
| data | Pointer to the raw byte payload. |
| len | Number of bytes in the payload. |
Definition at line 158 of file column_writer.hpp.
◆ write_double()
| void signet::forge::ColumnWriter::write_double |
( |
double |
val | ) |
|
|
inline |
Write a single DOUBLE value (8 bytes little-endian, IEEE 754).
- Parameters
-
Definition at line 134 of file column_writer.hpp.
◆ write_fixed_len_byte_array()
| void signet::forge::ColumnWriter::write_fixed_len_byte_array |
( |
const uint8_t * |
data, |
|
|
size_t |
len |
|
) |
| |
|
inline |
Write a single FIXED_LEN_BYTE_ARRAY value from raw bytes.
PLAIN encoding: raw bytes only (no length prefix – length is in the schema).
- Precondition
len must exactly match the schema's type_length. Passing a different length produces a corrupt column chunk (no runtime check is performed here for performance – the caller must validate).
- Parameters
-
| data | Pointer to the raw byte payload. |
| len | Number of bytes (must match the schema's type_length). |
Definition at line 185 of file column_writer.hpp.
◆ write_float()
| void signet::forge::ColumnWriter::write_float |
( |
float |
val | ) |
|
|
inline |
Write a single FLOAT value (4 bytes little-endian, IEEE 754).
- Parameters
-
Definition at line 123 of file column_writer.hpp.
◆ write_int32()
| void signet::forge::ColumnWriter::write_int32 |
( |
int32_t |
val | ) |
|
|
inline |
Write a single INT32 value (4 bytes little-endian).
- Parameters
-
| val | The 32-bit integer to encode. |
Definition at line 101 of file column_writer.hpp.
◆ write_int64()
| void signet::forge::ColumnWriter::write_int64 |
( |
int64_t |
val | ) |
|
|
inline |
Write a single INT64 value (8 bytes little-endian).
- Parameters
-
| val | The 64-bit integer to encode. |
Definition at line 112 of file column_writer.hpp.
The documentation for this class was generated from the following file: