11#include <unordered_set>
46 apply_default_logical_type();
54 apply_default_logical_type();
70 void apply_default_logical_type() {
71 if constexpr (std::is_same_v<T, std::string>) {
106 template <
typename T>
112 cd.logical_type = logical_type;
115 if constexpr (std::is_same_v<T, std::string>) {
121 columns_.push_back(std::move(
cd));
131 template <
typename T>
138 cd.logical_type = logical_type;
139 cd.repetition = repetition;
141 if constexpr (std::is_same_v<T, std::string>) {
147 columns_.push_back(std::move(
cd));
156 template <
typename T>
169 columns_.push_back(std::move(
cd));
182 std::vector<ColumnDescriptor> columns_;
216 template <
typename...
Cols>
218 std::vector<ColumnDescriptor>
descs;
220 (
descs.push_back(std::forward<Cols>(
cols).to_descriptor()), ...);
245 if (index >= columns_.size()) {
246 throw std::out_of_range(
"Schema::column: index "
247 + std::to_string(index) +
" out of range (num_columns="
248 + std::to_string(columns_.size()) +
")");
250 return columns_[index];
262 for (
size_t i = 0;
i < columns_.size(); ++
i) {
273 if (name_ != other.name_ || columns_.size() != other.columns_.size()) {
276 for (
size_t i = 0;
i < columns_.size(); ++
i) {
277 const auto&
a = columns_[
i];
278 const auto&
b = other.columns_[
i];
292 return !(*
this == other);
297 std::vector<ColumnDescriptor> columns_;
305 std::unordered_set<std::string>
seen;
306 seen.reserve(columns_.size());
307 for (
const auto&
cd : columns_) {
308 if (!
seen.insert(
cd.name).second) {
309 throw std::invalid_argument(
310 "Schema::build: duplicate column name '" +
cd.name +
"'");
313 return Schema(std::move(name_), std::move(columns_));
Fluent builder for constructing a Schema one column at a time.
SchemaBuilder & column(std::string col_name, LogicalType logical_type=LogicalType::NONE)
Add a typed column, deducing PhysicalType from T.
SchemaBuilder(std::string name)
Construct a builder with the given root schema name.
Schema build()
Build the final Schema, consuming the builder.
SchemaBuilder & optional_column(std::string col_name, LogicalType logical_type=LogicalType::NONE)
Add an optional (nullable) column — shorthand for Repetition::OPTIONAL.
SchemaBuilder & column(std::string col_name, LogicalType logical_type, Repetition repetition)
Add a column with an explicit repetition level.
SchemaBuilder & raw_column(ColumnDescriptor cd)
Add a pre-built ColumnDescriptor directly.
Immutable schema description for a Parquet file.
const std::vector< ColumnDescriptor > & columns() const
All column descriptors (ordered).
static Schema build(std::string name, Cols &&... cols)
Build a Schema from typed Column<T> descriptors (variadic factory).
static SchemaBuilder builder(std::string name)
Create a SchemaBuilder for fluent column construction.
Schema(std::string name, std::vector< ColumnDescriptor > columns)
Construct a schema directly from a name and column list.
bool operator==(const Schema &other) const
Equality — schemas match if they have the same name and identical columns (name, physical_type,...
bool operator!=(const Schema &other) const
Inequality operator.
size_t num_columns() const
Number of columns in this schema.
Schema()=default
Default-construct an empty schema.
const std::string & name() const
Root schema name (e.g. "tick_data").
std::optional< size_t > find_column(const std::string &col_name) const
Find a column index by name.
const ColumnDescriptor & column(size_t index) const
Access a column descriptor by index.
LogicalType
Parquet logical types (from parquet.thrift LogicalType union).
@ STRING
UTF-8 string (stored as BYTE_ARRAY).
@ NONE
No logical annotation — raw physical type.
Repetition
Parquet field repetition types (nullability / cardinality).
@ OPTIONAL
Zero or one value per row (nullable).
Descriptor for a single column in a Parquet schema.
int32_t type_length
Byte length for FIXED_LEN_BYTE_ARRAY columns (-1 = N/A).
LogicalType logical_type
Semantic annotation (STRING, TIMESTAMP_NS, etc.).
Repetition repetition
Nullability / cardinality.
std::string name
Column name (unique within a schema).
PhysicalType physical_type
On-disk storage type.
Typed column descriptor for the Schema::build() variadic API.
ColumnDescriptor to_descriptor() const
Convert to a ColumnDescriptor for Schema construction.
std::string name
Column name.
Column(std::string n, LogicalType lt)
Construct a column with an explicit logical type.
Column(std::string n)
Construct a column with a name only (logical type auto-deduced for strings).
LogicalType logical_type
Optional logical annotation.
Parquet format enumerations, type traits, and statistics structs.