Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
signet_wasm.cpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3
14
15#include <emscripten.h>
16#include <emscripten/bind.h>
17#include <emscripten/val.h>
18
19#include <cstdint>
20#include <string>
21#include <vector>
22
23#include "signet/forge.hpp"
24
25namespace em = emscripten;
26using namespace signet::forge;
27
28// ---------------------------------------------------------------------------
29// MEMFS helpers
30// ---------------------------------------------------------------------------
31
34static constexpr unsigned MEMFS_MAX_FILE_SIZE = 256u * 1024u * 1024u;
35
44static bool writeFileToMemfs(const std::string& path, const em::val& arrayBuffer) {
45 auto view = em::val::global("Uint8Array").new_(arrayBuffer);
46 auto len = view["length"].as<unsigned>();
47 if (len > MEMFS_MAX_FILE_SIZE) {
48 emscripten_log(EM_LOG_ERROR, "writeFileToMemfs: file too large (%u bytes, max %u)", len, MEMFS_MAX_FILE_SIZE);
49 return false;
50 }
51 std::vector<uint8_t> buf(len);
52 auto memView = em::val(em::typed_memory_view(len, buf.data()));
53 memView.call<void>("set", view);
54 EM_ASM({
55 FS.writeFile(UTF8ToString($0), HEAPU8.subarray($1, $1 + $2));
56 }, path.c_str(), buf.data(), static_cast<int>(len));
57 return true;
58}
59
66static em::val readFileFromMemfs(const std::string& path) {
67 em::val fs = em::val::module_property("FS");
68 return fs.call<em::val>("readFile", path);
69}
70
71// ---------------------------------------------------------------------------
72// Hex and minimal JSON helpers (for encryption key input)
73// ---------------------------------------------------------------------------
74
80static std::vector<uint8_t> hexToBytes(const std::string& hex) {
81 std::vector<uint8_t> bytes;
82 if (hex.size() % 2 != 0) return bytes;
83 bytes.reserve(hex.size() / 2);
84 for (size_t i = 0; i < hex.size(); i += 2) {
85 auto hi = hex[i], lo = hex[i + 1];
86 auto nibble = [](char c) -> int {
87 if (c >= '0' && c <= '9') return c - '0';
88 if (c >= 'a' && c <= 'f') return 10 + c - 'a';
89 if (c >= 'A' && c <= 'F') return 10 + c - 'A';
90 return -1;
91 };
92 int h = nibble(hi), l = nibble(lo);
93 if (h < 0 || l < 0) return {};
94 bytes.push_back(static_cast<uint8_t>((h << 4) | l));
95 }
96 return bytes;
97}
98
107static std::vector<std::pair<std::string, std::string>>
108parseColumnKeys(const std::string& json) {
109 std::vector<std::pair<std::string, std::string>> result;
110 size_t i = json.find('{');
111 if (i == std::string::npos) return result;
112 ++i;
113 auto skipWs = [&]() { while (i < json.size() && json[i] <= ' ') ++i; };
114 auto readStr = [&]() -> std::string {
115 skipWs();
116 if (i >= json.size() || json[i] != '"') return {};
117 ++i;
118 std::string s;
119 while (i < json.size() && json[i] != '"') {
120 if (json[i] == '\\' && i + 1 < json.size()) {
121 ++i; // skip backslash, take next char literally
122 }
123 s += json[i++];
124 }
125 if (i < json.size()) ++i; // skip closing quote
126 return s;
127 };
128 while (i < json.size()) {
129 skipWs();
130 if (json[i] == '}') break;
131 auto key = readStr();
132 skipWs();
133 if (i < json.size() && json[i] == ':') ++i;
134 auto val = readStr();
135 if (!key.empty()) result.emplace_back(std::move(key), std::move(val));
136 skipWs();
137 if (i < json.size() && json[i] == ',') ++i;
138 }
139 return result;
140}
141
142// ---------------------------------------------------------------------------
143// Version
144// ---------------------------------------------------------------------------
145
147static std::string version() {
148 return SIGNET_CREATED_BY;
149}
150
151// ---------------------------------------------------------------------------
152// SchemaBuilder wrapper — chain-style API for JS
153// ---------------------------------------------------------------------------
154
165 SchemaBuilder builder_;
166public:
169 explicit WasmSchemaBuilder(const std::string& name) : builder_(name) {}
170
177 WasmSchemaBuilder& addBool(const std::string& col) { builder_.column<bool>(col); return *this; }
178 WasmSchemaBuilder& addInt32(const std::string& col) { builder_.column<int32_t>(col); return *this; }
179 WasmSchemaBuilder& addInt64(const std::string& col) { builder_.column<int64_t>(col); return *this; }
180 WasmSchemaBuilder& addFloat(const std::string& col) { builder_.column<float>(col); return *this; }
181 WasmSchemaBuilder& addDouble(const std::string& col) { builder_.column<double>(col); return *this; }
182 WasmSchemaBuilder& addString(const std::string& col) { builder_.column<std::string>(col); return *this; }
184
186 Schema build() { return builder_.build(); }
187};
188
189// ---------------------------------------------------------------------------
190// Schema accessors (free functions bound onto the Schema class via embind)
191// ---------------------------------------------------------------------------
192
194static size_t schemaNumColumns(const Schema& s) { return s.num_columns(); }
195
197static std::string schemaColumnName(const Schema& s, size_t i) {
198 if (i >= s.num_columns()) return "";
199 return s.column(i).name;
200}
201
203static int schemaColumnPhysicalType(const Schema& s, size_t i) {
204 if (i >= s.num_columns()) return -1;
205 return static_cast<int>(s.column(i).physical_type);
206}
207
209static std::string schemaName(const Schema& s) { return s.name(); }
210
215static std::string physicalTypeName(int pt) {
216 switch (static_cast<PhysicalType>(pt)) {
217 case PhysicalType::BOOLEAN: return "BOOLEAN";
218 case PhysicalType::INT32: return "INT32";
219 case PhysicalType::INT64: return "INT64";
220 case PhysicalType::INT96: return "INT96";
221 case PhysicalType::FLOAT: return "FLOAT";
222 case PhysicalType::DOUBLE: return "DOUBLE";
223 case PhysicalType::BYTE_ARRAY: return "BYTE_ARRAY";
224 case PhysicalType::FIXED_LEN_BYTE_ARRAY: return "FIXED_LEN_BYTE_ARRAY";
225 default: return "UNKNOWN";
226 }
227}
228
233static std::string logicalTypeName(int lt) {
234 switch (static_cast<LogicalType>(lt)) {
235 case LogicalType::NONE: return "NONE";
236 case LogicalType::STRING: return "STRING";
237 case LogicalType::ENUM: return "ENUM";
238 case LogicalType::UUID: return "UUID";
239 case LogicalType::DATE: return "DATE";
240 case LogicalType::TIME_MS: return "TIME_MS";
241 case LogicalType::TIME_US: return "TIME_US";
242 case LogicalType::TIME_NS: return "TIME_NS";
243 case LogicalType::TIMESTAMP_MS: return "TIMESTAMP_MS";
244 case LogicalType::TIMESTAMP_US: return "TIMESTAMP_US";
245 case LogicalType::TIMESTAMP_NS: return "TIMESTAMP_NS";
246 case LogicalType::DECIMAL: return "DECIMAL";
247 case LogicalType::JSON: return "JSON";
248 case LogicalType::BSON: return "BSON";
249 case LogicalType::FLOAT16: return "FLOAT16";
250 case LogicalType::FLOAT32_VECTOR: return "FLOAT32_VECTOR";
251 default: return "UNKNOWN";
252 }
253}
254
256static int schemaColumnLogicalType(const Schema& s, size_t i) {
257 if (i >= s.num_columns()) return -1;
258 return static_cast<int>(s.column(i).logical_type);
259}
260
261// ---------------------------------------------------------------------------
262// WriterOptions wrapper
263// ---------------------------------------------------------------------------
264
267public:
269
271 WasmWriterOptions() = default;
272
275 void setRowGroupSize(int64_t n) { opts.row_group_size = n; }
276
278 int64_t getRowGroupSize() const { return opts.row_group_size; }
279};
280
281// ---------------------------------------------------------------------------
282// ParquetWriter wrapper
283// ---------------------------------------------------------------------------
284
291 std::unique_ptr<ParquetWriter> writer_;
292public:
294 WasmParquetWriter() = default;
295
301 bool open(const std::string& path, const Schema& schema, const WasmWriterOptions& opts) {
302 auto result = ParquetWriter::open(path, schema, opts.opts);
303 if (!result.has_value()) return false;
304 writer_ = std::make_unique<ParquetWriter>(std::move(*result));
305 return true;
306 }
307
315
317 bool writeColumnBool(size_t col, const em::val& arr) {
318 if (!writer_ || col >= writer_->num_columns()) return false;
319 auto len = arr["length"].as<unsigned>();
320 std::vector<bool> buf(len);
321 for (unsigned i = 0; i < len; ++i) buf[i] = arr[i].as<bool>();
322 // bool write_column needs a raw bool array
323 std::vector<uint8_t> raw(len);
324 for (unsigned i = 0; i < len; ++i) raw[i] = buf[i] ? 1 : 0;
325 return writer_->write_column<bool>(col, reinterpret_cast<const bool*>(raw.data()), len).has_value();
326 }
327
329 bool writeColumnInt32(size_t col, const em::val& arr) {
330 if (!writer_ || col >= writer_->num_columns()) return false;
331 auto len = arr["length"].as<unsigned>();
332 std::vector<int32_t> buf(len);
333 for (unsigned i = 0; i < len; ++i) buf[i] = arr[i].as<int32_t>();
334 return writer_->write_column<int32_t>(col, buf.data(), len).has_value();
335 }
336
338 bool writeColumnInt64(size_t col, const em::val& arr) {
339 if (!writer_ || col >= writer_->num_columns()) return false;
340 auto len = arr["length"].as<unsigned>();
341 std::vector<int64_t> buf(len);
342 for (unsigned i = 0; i < len; ++i) buf[i] = arr[i].as<int64_t>();
343 return writer_->write_column<int64_t>(col, buf.data(), len).has_value();
344 }
345
347 bool writeColumnFloat(size_t col, const em::val& arr) {
348 if (!writer_ || col >= writer_->num_columns()) return false;
349 auto len = arr["length"].as<unsigned>();
350 std::vector<float> buf(len);
351 for (unsigned i = 0; i < len; ++i) buf[i] = arr[i].as<float>();
352 return writer_->write_column<float>(col, buf.data(), len).has_value();
353 }
354
356 bool writeColumnDouble(size_t col, const em::val& arr) {
357 if (!writer_ || col >= writer_->num_columns()) return false;
358 auto len = arr["length"].as<unsigned>();
359 std::vector<double> buf(len);
360 for (unsigned i = 0; i < len; ++i) buf[i] = arr[i].as<double>();
361 return writer_->write_column<double>(col, buf.data(), len).has_value();
362 }
363
365 bool writeColumnString(size_t col, const em::val& arr) {
366 if (!writer_ || col >= writer_->num_columns()) return false;
367 auto len = arr["length"].as<unsigned>();
368 std::vector<std::string> buf(len);
369 for (unsigned i = 0; i < len; ++i) buf[i] = arr[i].as<std::string>();
370 return writer_->write_column<std::string>(col, buf.data(), len).has_value();
371 }
372
374
378 if (!writer_) return false;
379 return writer_->flush_row_group().has_value();
380 }
381
384 bool close() {
385 if (!writer_) return false;
386 return writer_->close().has_value();
387 }
388
390 int64_t rowsWritten() const {
391 return writer_ ? writer_->rows_written() : 0;
392 }
393
395 bool isOpen() const {
396 return writer_ && writer_->is_open();
397 }
398};
399
400// ---------------------------------------------------------------------------
401// ParquetReader wrapper
402// ---------------------------------------------------------------------------
403
410 std::unique_ptr<ParquetReader> reader_;
411public:
413 WasmParquetReader() = default;
414
418 bool open(const std::string& path) {
419 auto result = ParquetReader::open(path);
420 if (!result.has_value()) return false;
421 reader_ = std::make_unique<ParquetReader>(std::move(*result));
422 return true;
423 }
424
425#if SIGNET_ENABLE_COMMERCIAL
442 bool openEncrypted(const std::string& path,
443 const std::string& footerKeyHex,
444 const std::string& columnKeyHex,
445 const std::string& aadPrefix,
446 const std::string& columnKeysJson) {
448 cfg.footer_key = hexToBytes(footerKeyHex);
449 if (cfg.footer_key.size() != 32) return false;
450
451 if (!columnKeyHex.empty()) {
452 cfg.default_column_key = hexToBytes(columnKeyHex);
453 if (cfg.default_column_key.size() != 32) return false;
454 }
455
456 if (!aadPrefix.empty()) {
457 cfg.aad_prefix = aadPrefix;
458 }
459
460 if (!columnKeysJson.empty()) {
461 auto keys = parseColumnKeys(columnKeysJson);
462 for (auto& [name, hexKey] : keys) {
464 spec.column_name = name;
465 spec.key = hexToBytes(hexKey);
466 if (spec.key.size() != 32) return false;
467 cfg.column_keys.push_back(std::move(spec));
468 }
469 }
470
471 auto result = ParquetReader::open(path, cfg);
472
473 // Zero key material from WASM memory regardless of success/failure
474 auto zero_vec = [](std::vector<uint8_t>& v) {
475 if (!v.empty()) {
476 volatile uint8_t* p = v.data();
477 for (size_t i = 0; i < v.size(); ++i) p[i] = 0;
478 }
479 v.clear();
480 };
481 zero_vec(cfg.footer_key);
482 zero_vec(cfg.default_column_key);
483 for (auto& ck : cfg.column_keys) zero_vec(ck.key);
484
485 if (!result.has_value()) return false;
486 reader_ = std::make_unique<ParquetReader>(std::move(*result));
487 return true;
488 }
489#endif
490
492 int64_t numRows() const {
493 return reader_ ? reader_->num_rows() : 0;
494 }
495
497 int64_t numRowGroups() const {
498 return reader_ ? reader_->num_row_groups() : 0;
499 }
500
502 Schema schema() const {
503 if (!reader_) return Schema{};
504 return reader_->schema();
505 }
506
508 std::string createdBy() const {
509 return reader_ ? reader_->created_by() : "";
510 }
511
519
521 em::val readColumnBool(size_t rg, size_t col) {
522 if (!reader_) return em::val::array();
523 auto result = reader_->read_column<bool>(rg, col);
524 if (!result.has_value()) return em::val::array();
525 auto arr = em::val::array();
526 for (size_t i = 0; i < result->size(); ++i)
527 arr.call<void>("push", (*result)[i]);
528 return arr;
529 }
530
532 em::val readColumnInt32(size_t rg, size_t col) {
533 if (!reader_) return em::val::array();
534 auto result = reader_->read_column<int32_t>(rg, col);
535 if (!result.has_value()) return em::val::array();
536 auto arr = em::val::array();
537 for (size_t i = 0; i < result->size(); ++i)
538 arr.call<void>("push", (*result)[i]);
539 return arr;
540 }
541
543 em::val readColumnInt64(size_t rg, size_t col) {
544 if (!reader_) return em::val::array();
545 auto result = reader_->read_column<int64_t>(rg, col);
546 if (!result.has_value()) return em::val::array();
547 auto arr = em::val::array();
548 for (size_t i = 0; i < result->size(); ++i)
549 arr.call<void>("push", static_cast<double>((*result)[i]));
550 return arr;
551 }
552
554 em::val readColumnFloat(size_t rg, size_t col) {
555 if (!reader_) return em::val::array();
556 auto result = reader_->read_column<float>(rg, col);
557 if (!result.has_value()) return em::val::array();
558 auto arr = em::val::array();
559 for (size_t i = 0; i < result->size(); ++i)
560 arr.call<void>("push", (*result)[i]);
561 return arr;
562 }
563
565 em::val readColumnDouble(size_t rg, size_t col) {
566 if (!reader_) return em::val::array();
567 auto result = reader_->read_column<double>(rg, col);
568 if (!result.has_value()) return em::val::array();
569 auto arr = em::val::array();
570 for (size_t i = 0; i < result->size(); ++i)
571 arr.call<void>("push", (*result)[i]);
572 return arr;
573 }
574
576 em::val readColumnString(size_t rg, size_t col) {
577 if (!reader_) return em::val::array();
578 auto result = reader_->read_column<std::string>(rg, col);
579 if (!result.has_value()) return em::val::array();
580 auto arr = em::val::array();
581 for (size_t i = 0; i < result->size(); ++i)
582 arr.call<void>("push", (*result)[i]);
583 return arr;
584 }
585
587
595 em::val readColumnAsStrings(size_t rg, size_t col) {
596 if (!reader_) return em::val::array();
597 auto result = reader_->read_column_as_strings(rg, col);
598 if (!result.has_value()) return em::val::array();
599 auto arr = em::val::array();
600 for (size_t i = 0; i < result->size(); ++i)
601 arr.call<void>("push", (*result)[i]);
602 return arr;
603 }
604};
605
606// ---------------------------------------------------------------------------
607// Embind registrations
608// ---------------------------------------------------------------------------
609
617EMSCRIPTEN_BINDINGS(signet_forge) {
618 // Free functions
619 em::function("version", &version);
620 em::function("physicalTypeName", &physicalTypeName);
621 em::function("logicalTypeName", &logicalTypeName);
622 em::function("writeFileToMemfs", &writeFileToMemfs);
623 em::function("readFileFromMemfs", &readFileFromMemfs);
624
625 // Schema
626 em::class_<Schema>("Schema")
627 .constructor<>()
628 .function("numColumns", &schemaNumColumns)
629 .function("columnName", &schemaColumnName)
630 .function("columnPhysicalType", &schemaColumnPhysicalType)
631 .function("columnLogicalType", &schemaColumnLogicalType)
632 .function("name", &schemaName)
633 ;
634
635 // SchemaBuilder
636 em::class_<WasmSchemaBuilder>("SchemaBuilder")
637 .constructor<std::string>()
638 .function("addBool", &WasmSchemaBuilder::addBool)
639 .function("addInt32", &WasmSchemaBuilder::addInt32)
640 .function("addInt64", &WasmSchemaBuilder::addInt64)
641 .function("addFloat", &WasmSchemaBuilder::addFloat)
642 .function("addDouble", &WasmSchemaBuilder::addDouble)
643 .function("addString", &WasmSchemaBuilder::addString)
644 .function("build", &WasmSchemaBuilder::build)
645 ;
646
647 // WriterOptions
648 em::class_<WasmWriterOptions>("WriterOptions")
649 .constructor<>()
650 .function("setRowGroupSize", &WasmWriterOptions::setRowGroupSize)
651 .function("getRowGroupSize", &WasmWriterOptions::getRowGroupSize)
652 ;
653
654 // ParquetWriter
655 em::class_<WasmParquetWriter>("ParquetWriter")
656 .constructor<>()
657 .function("open", &WasmParquetWriter::open)
658 .function("writeColumnBool", &WasmParquetWriter::writeColumnBool)
659 .function("writeColumnInt32", &WasmParquetWriter::writeColumnInt32)
660 .function("writeColumnInt64", &WasmParquetWriter::writeColumnInt64)
661 .function("writeColumnFloat", &WasmParquetWriter::writeColumnFloat)
662 .function("writeColumnDouble", &WasmParquetWriter::writeColumnDouble)
663 .function("writeColumnString", &WasmParquetWriter::writeColumnString)
664 .function("flushRowGroup", &WasmParquetWriter::flushRowGroup)
665 .function("close", &WasmParquetWriter::close)
666 .function("rowsWritten", &WasmParquetWriter::rowsWritten)
667 .function("isOpen", &WasmParquetWriter::isOpen)
668 ;
669
670 // ParquetReader
671 em::class_<WasmParquetReader>("ParquetReader")
672 .constructor<>()
673 .function("open", &WasmParquetReader::open)
674#if SIGNET_ENABLE_COMMERCIAL
675 .function("openEncrypted", &WasmParquetReader::openEncrypted)
676#endif
677 .function("numRows", &WasmParquetReader::numRows)
678 .function("numRowGroups", &WasmParquetReader::numRowGroups)
679 .function("schema", &WasmParquetReader::schema)
680 .function("createdBy", &WasmParquetReader::createdBy)
681 .function("readColumnBool", &WasmParquetReader::readColumnBool)
682 .function("readColumnInt32", &WasmParquetReader::readColumnInt32)
683 .function("readColumnInt64", &WasmParquetReader::readColumnInt64)
684 .function("readColumnFloat", &WasmParquetReader::readColumnFloat)
685 .function("readColumnDouble", &WasmParquetReader::readColumnDouble)
686 .function("readColumnString", &WasmParquetReader::readColumnString)
687 .function("readColumnAsStrings", &WasmParquetReader::readColumnAsStrings)
688 ;
689}
JavaScript-facing Parquet reader.
em::val readColumnInt32(size_t rg, size_t col)
Read an int32 column as a JS Array of numbers.
em::val readColumnFloat(size_t rg, size_t col)
Read a float column as a JS Array of numbers.
em::val readColumnBool(size_t rg, size_t col)
Read a boolean column as a JS Array of booleans.
bool open(const std::string &path)
Open a plaintext Parquet file from MEMFS.
em::val readColumnInt64(size_t rg, size_t col)
Read an int64 column as a JS Array of doubles (JS has no native int64).
int64_t numRowGroups() const
Return the number of row groups in the file, or 0 if not open.
em::val readColumnString(size_t rg, size_t col)
Read a string (BYTE_ARRAY) column as a JS Array of strings.
WasmParquetReader()=default
Default-construct in an unopened state.
em::val readColumnAsStrings(size_t rg, size_t col)
Read any column as a JS Array of strings (type-erased).
em::val readColumnDouble(size_t rg, size_t col)
Read a double column as a JS Array of numbers.
Schema schema() const
Return the file's schema, or an empty Schema if not open.
int64_t numRows() const
Return total row count across all row groups, or 0 if not open.
std::string createdBy() const
Return the "created by" metadata string, or "" if not open.
JavaScript-facing Parquet writer.
bool flushRowGroup()
Flush the current row group to disk and begin a new one.
bool writeColumnInt32(size_t col, const em::val &arr)
Write an int32 column from a JS array.
bool writeColumnBool(size_t col, const em::val &arr)
Write a boolean column from a JS array.
bool writeColumnString(size_t col, const em::val &arr)
Write a string (BYTE_ARRAY) column from a JS array.
WasmParquetWriter()=default
Default-construct in an unopened state.
bool writeColumnInt64(size_t col, const em::val &arr)
Write an int64 column from a JS array.
bool writeColumnDouble(size_t col, const em::val &arr)
Write a double column from a JS array.
int64_t rowsWritten() const
Return the total number of rows written so far (across all row groups).
bool close()
Finalize the Parquet file (writes footer metadata and closes the file).
bool isOpen() const
Check whether the writer is currently open and accepting data.
bool writeColumnFloat(size_t col, const em::val &arr)
Write a float column from a JS array.
bool open(const std::string &path, const Schema &schema, const WasmWriterOptions &opts)
Open a new Parquet file for writing on MEMFS.
Fluent Parquet schema builder exposed to JavaScript.
WasmSchemaBuilder & addInt64(const std::string &col)
WasmSchemaBuilder & addBool(const std::string &col)
Schema build()
Finalize and return the immutable Schema object.
WasmSchemaBuilder & addInt32(const std::string &col)
WasmSchemaBuilder & addDouble(const std::string &col)
WasmSchemaBuilder(const std::string &name)
Construct a new schema builder.
WasmSchemaBuilder & addString(const std::string &col)
WasmSchemaBuilder & addFloat(const std::string &col)
Thin wrapper around core WriterOptions for JavaScript consumption.
WriterOptions opts
Underlying writer options struct.
int64_t getRowGroupSize() const
Get the current row group size setting.
WasmWriterOptions()=default
Construct with default options.
void setRowGroupSize(int64_t n)
Set the target row group size (number of rows per group).
static expected< ParquetReader > open(const std::filesystem::path &path)
Open and parse a Parquet file, returning a ready-to-query reader.
Definition reader.hpp:189
static expected< ParquetWriter > open(const std::filesystem::path &path, const Schema &schema, const Options &options=Options{})
Open a new Parquet file for writing.
Definition writer.hpp:303
Fluent builder for constructing a Schema one column at a time.
Definition schema.hpp:92
SchemaBuilder & column(std::string col_name, LogicalType logical_type=LogicalType::NONE)
Add a typed column, deducing PhysicalType from T.
Definition schema.hpp:107
Schema build()
Build the final Schema, consuming the builder.
Definition schema.hpp:303
Immutable schema description for a Parquet file.
Definition schema.hpp:192
size_t num_columns() const
Number of columns in this schema.
Definition schema.hpp:238
const std::string & name() const
Root schema name (e.g. "tick_data").
Definition schema.hpp:235
const ColumnDescriptor & column(size_t index) const
Access a column descriptor by index.
Definition schema.hpp:244
Single-include umbrella header for the Signet Forge library.
constexpr const char * SIGNET_CREATED_BY
Default "created_by" string embedded in every Parquet footer.
Definition types.hpp:203
PhysicalType
Parquet physical (storage) types as defined in parquet.thrift.
Definition types.hpp:20
LogicalType
Parquet logical types (from parquet.thrift LogicalType union).
Definition types.hpp:41
EMSCRIPTEN_BINDINGS(signet_forge)
Emscripten embind registration block.
LogicalType logical_type
Semantic annotation (STRING, TIMESTAMP_NS, etc.).
Definition types.hpp:155
std::string name
Column name (unique within a schema).
Definition types.hpp:153
PhysicalType physical_type
On-disk storage type.
Definition types.hpp:154
Configuration options for ParquetWriter.
Definition writer.hpp:188
int64_t row_group_size
Target number of rows per row group.
Definition writer.hpp:192
Specifies the encryption key for a single Parquet column.
std::string column_name
Parquet column path (e.g. "a.b.c").
std::vector< uint8_t > key
32-byte AES-256 key (INTERNAL mode).
Top-level configuration structure that drives FileEncryptor / FileDecryptor.
std::vector< uint8_t > default_column_key
Default column key (32 bytes).
std::vector< uint8_t > footer_key
32-byte AES-256 key for encrypting the Parquet footer (FileMetaData).
std::string aad_prefix
AAD prefix – typically a file identifier or URI.
std::vector< ColumnKeySpec > column_keys
Per-column key specifications. Columns listed here get their own key.