Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
quantized_vector.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3#pragma once
6
7// ---------------------------------------------------------------------------
8// quantized_vector.hpp -- INT8/INT4 quantized vector storage for AI embeddings
9//
10// Header-only. Provides quantization (float32 -> int8/int4) and dequantization
11// (int8/int4 -> float32) for storing ML embedding vectors in Parquet files
12// with 75-87.5% storage savings over native FLOAT32.
13//
14// Quantization schemes:
15// SYMMETRIC_INT8 -- value = round(float / scale), range [-127, 127]
16// ASYMMETRIC_INT8 -- value = round((float - zero_point) / scale), [0, 255]
17// SYMMETRIC_INT4 -- value = round(float / scale), range [-7, 7],
18// two values per byte (high nibble first)
19//
20// SIMD acceleration: AVX2 (8 floats), SSE2 (4 floats), NEON (4 floats),
21// with scalar fallback.
22// ---------------------------------------------------------------------------
23
24#include "signet/types.hpp"
25#include "signet/error.hpp"
26
27#include <algorithm>
28#include <atomic>
29#include <cmath>
30#include <cstdint>
31#include <cstdio>
32#include <cstring>
33#include <limits>
34#include <string>
35#include <vector>
36
37// ---------------------------------------------------------------------------
38// SIMD platform detection
39// ---------------------------------------------------------------------------
40#if defined(__AVX2__)
41 #include <immintrin.h>
42 #define SIGNET_QUANT_AVX2 1
43#elif defined(__SSE4_2__) || defined(__SSE4_1__) || defined(__SSE2__)
44 #include <immintrin.h>
45 #define SIGNET_QUANT_SSE 1
46#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
47 #include <arm_neon.h>
48 #define SIGNET_QUANT_NEON 1
49#endif
50
51namespace signet::forge {
52
53// ---------------------------------------------------------------------------
54// QuantizationScheme -- identifies the quantization method
55// ---------------------------------------------------------------------------
56
60enum class QuantizationScheme : int32_t {
61 SYMMETRIC_INT8 = 0,
62 ASYMMETRIC_INT8 = 1,
64};
65
66// ---------------------------------------------------------------------------
67// QuantizationParams -- parameters that fully describe a quantization mapping
68// ---------------------------------------------------------------------------
69
78 float scale = 0.0f;
79 float zero_point = 0.0f;
80 uint32_t dimension = 0;
81
92 [[nodiscard]] static inline QuantizationParams compute(
93 const float* data,
94 size_t num_vectors,
95 uint32_t dim,
97
103 [[nodiscard]] inline std::string serialize() const;
104
109 [[nodiscard]] static inline expected<QuantizationParams> deserialize(const std::string& s);
110
117 [[nodiscard]] inline size_t bytes_per_vector() const;
118};
119
120// ---------------------------------------------------------------------------
121// Quantizer -- quantize float32 vectors to int8 or int4
122// ---------------------------------------------------------------------------
123
132public:
136 : params_(std::move(params))
137 , inv_scale_((std::isfinite(params_.scale) && params_.scale > 0.0f)
138 ? 1.0f / params_.scale
139 : 1.0f) {
140 if (!std::isfinite(params_.scale) || params_.scale <= 0.0f) {
141 params_.scale = 1.0f;
142 params_.zero_point = 0.0f;
143 }
144 }
145
153 inline void quantize(const float* input, uint8_t* output) const;
154
160 [[nodiscard]] inline std::vector<uint8_t> quantize_batch(
161 const float* input, size_t num_vectors) const;
162
164 [[nodiscard]] const QuantizationParams& params() const { return params_; }
165
166private:
167 QuantizationParams params_;
168 float inv_scale_; // precomputed 1/scale for multiply-instead-of-divide
169
170 // -- Scalar helpers --------------------------------------------------
171 inline void quantize_symmetric_int8_scalar(const float* in, uint8_t* out, uint32_t dim) const;
172 inline void quantize_asymmetric_int8_scalar(const float* in, uint8_t* out, uint32_t dim) const;
173 inline void quantize_symmetric_int4_scalar(const float* in, uint8_t* out, uint32_t dim) const;
174
175 // -- SIMD helpers (defined only when platform macros are set) ---------
176#if defined(SIGNET_QUANT_AVX2)
177 inline void quantize_symmetric_int8_avx2(const float* in, uint8_t* out, uint32_t dim) const;
178 inline void quantize_asymmetric_int8_avx2(const float* in, uint8_t* out, uint32_t dim) const;
179#elif defined(SIGNET_QUANT_SSE)
180 inline void quantize_symmetric_int8_sse(const float* in, uint8_t* out, uint32_t dim) const;
181 inline void quantize_asymmetric_int8_sse(const float* in, uint8_t* out, uint32_t dim) const;
182#elif defined(SIGNET_QUANT_NEON)
183 inline void quantize_symmetric_int8_neon(const float* in, uint8_t* out, uint32_t dim) const;
184 inline void quantize_asymmetric_int8_neon(const float* in, uint8_t* out, uint32_t dim) const;
185#endif
186};
187
188// ---------------------------------------------------------------------------
189// Dequantizer -- dequantize int8/int4 back to float32
190// ---------------------------------------------------------------------------
191
199public:
202 : params_(std::move(params)) {}
203
208 inline void dequantize(const uint8_t* input, float* output) const;
209
215 [[nodiscard]] inline std::vector<std::vector<float>> dequantize_batch(
216 const uint8_t* input, size_t num_vectors) const;
217
223 [[nodiscard]] inline std::vector<float> dequantize_flat(
224 const uint8_t* input, size_t num_vectors) const;
225
227 [[nodiscard]] const QuantizationParams& params() const { return params_; }
228
231 [[nodiscard]] uint64_t anomaly_count() const { return anomaly_count_.load(std::memory_order_relaxed); }
232
233private:
234 QuantizationParams params_;
235 mutable std::atomic<uint64_t> anomaly_count_{0};
236
237 // -- Scalar helpers --------------------------------------------------
238 inline void dequantize_symmetric_int8_scalar(const uint8_t* in, float* out, uint32_t dim) const;
239 inline void dequantize_asymmetric_int8_scalar(const uint8_t* in, float* out, uint32_t dim) const;
240 inline void dequantize_symmetric_int4_scalar(const uint8_t* in, float* out, uint32_t dim) const;
241
242 // -- SIMD helpers (defined only when platform macros are set) ---------
243#if defined(SIGNET_QUANT_AVX2)
244 inline void dequantize_symmetric_int8_avx2(const uint8_t* in, float* out, uint32_t dim) const;
245 inline void dequantize_asymmetric_int8_avx2(const uint8_t* in, float* out, uint32_t dim) const;
246#elif defined(SIGNET_QUANT_SSE)
247 inline void dequantize_symmetric_int8_sse(const uint8_t* in, float* out, uint32_t dim) const;
248 inline void dequantize_asymmetric_int8_sse(const uint8_t* in, float* out, uint32_t dim) const;
249#elif defined(SIGNET_QUANT_NEON)
250 inline void dequantize_symmetric_int8_neon(const uint8_t* in, float* out, uint32_t dim) const;
251 inline void dequantize_asymmetric_int8_neon(const uint8_t* in, float* out, uint32_t dim) const;
252#endif
253};
254
255// ---------------------------------------------------------------------------
256// QuantizedVectorWriter -- accumulates float32 vectors, quantizes, and
257// produces FIXED_LEN_BYTE_ARRAY page data for Parquet column chunks.
258// ---------------------------------------------------------------------------
259
265public:
268 : quantizer_(params), num_vectors_(0) {}
269
272 inline void add(const float* data);
273
276 inline void add_raw(const uint8_t* data);
277
281 inline void add_batch(const float* data, size_t num_vectors);
282
288 [[nodiscard]] inline std::vector<uint8_t> flush();
289
291 [[nodiscard]] size_t num_vectors() const { return num_vectors_; }
292
301 [[nodiscard]] static inline ColumnDescriptor make_descriptor(
302 const std::string& name,
304
306 [[nodiscard]] const QuantizationParams& params() const {
307 return quantizer_.params();
308 }
309
310private:
311 Quantizer quantizer_;
312 std::vector<uint8_t> buf_;
313 size_t num_vectors_;
314};
315
316// ---------------------------------------------------------------------------
317// QuantizedVectorReader -- reads quantized page data and dequantizes to
318// float32 on demand.
319// ---------------------------------------------------------------------------
320
326public:
329 : params_(std::move(params)), dequantizer_(params_) {}
330
335 [[nodiscard]] inline std::vector<std::vector<float>> read_page(
336 const uint8_t* data, size_t data_size);
337
343 [[nodiscard]] inline std::vector<float> read_vector(
344 const uint8_t* page_data, size_t page_size, size_t index);
345
347 struct RawResult {
348 const uint8_t* data;
349 size_t num_vectors;
350 };
351
357 [[nodiscard]] inline expected<RawResult> read_raw(
358 const uint8_t* data, size_t data_size);
359
360private:
361 QuantizationParams params_;
362 Dequantizer dequantizer_;
363};
364
365
366// ===========================================================================
367//
368// IMPLEMENTATION -- QuantizationParams
369//
370// ===========================================================================
371
373 const float* data,
374 size_t num_vectors,
375 uint32_t dim,
376 QuantizationScheme scheme)
377{
379 p.scheme = scheme;
380 p.dimension = dim;
381
382 if (num_vectors == 0 || dim == 0) {
383 p.scale = 1.0f;
384 p.zero_point = 0.0f;
385 return p;
386 }
387
388 const size_t total = num_vectors * static_cast<size_t>(dim);
389
390 // Find min and max across all finite values (skip NaN/Infinity)
391 // Parenthesized to prevent MSVC min/max macro expansion
392 float vmin = (std::numeric_limits<float>::max)();
393 float vmax = (std::numeric_limits<float>::lowest)();
394 for (size_t i = 0; i < total; ++i) {
395 const float v = data[i];
396 if (!std::isfinite(v)) continue;
397 if (v < vmin) vmin = v;
398 if (v > vmax) vmax = v;
399 }
400 // If no finite values found, fall back to safe defaults
401 if (vmin > vmax) {
402 p.scale = 1.0f;
403 p.zero_point = 0.0f;
404 return p;
405 }
406
407 switch (scheme) {
409 // scale = max(|min|, |max|) / 127
410 const float abs_max = (std::max)(std::fabs(vmin), std::fabs(vmax));
411 p.scale = (abs_max > 0.0f) ? (abs_max / 127.0f) : 1.0f;
412 p.zero_point = 0.0f;
413 break;
414 }
416 // scale = (max - min) / 255, zero_point = min
417 const float range = vmax - vmin;
418 p.scale = (range > 0.0f) ? (range / 255.0f) : 1.0f;
419 p.zero_point = vmin;
420 break;
421 }
423 // scale = max(|min|, |max|) / 7
424 const float abs_max = (std::max)(std::fabs(vmin), std::fabs(vmax));
425 p.scale = (abs_max > 0.0f) ? (abs_max / 7.0f) : 1.0f;
426 p.zero_point = 0.0f;
427 break;
428 }
429 }
430
431 // MiFID II Annex I Field 6: verify that quantization scale preserves
432 // required price precision for regulated instruments.
433 // The smallest representable delta equals `scale`. If this exceeds the
434 // instrument tick size, quantized prices may violate precision requirements.
435 // Users embedding regulated price data should confirm scale < tick_size.
436
437 return p;
438}
439
440inline std::string QuantizationParams::serialize() const {
441 // Format: scheme=N;scale=F;zero_point=F;dimension=N
442 std::string s;
443 s += "scheme=";
444 s += std::to_string(static_cast<int32_t>(scheme));
445 s += ";scale=";
446 { char buf[32]; std::snprintf(buf, sizeof(buf), "%.9g", static_cast<double>(scale)); s += buf; }
447 s += ";zero_point=";
448 { char buf[32]; std::snprintf(buf, sizeof(buf), "%.9g", static_cast<double>(zero_point)); s += buf; }
449 s += ";dimension=";
450 s += std::to_string(dimension);
451 return s;
452}
453
456
457 // Parse semicolon-delimited key=value pairs
458 bool got_scheme = false, got_scale = false, got_dim = false;
459
460 size_t pos = 0;
461 while (pos < s.size()) {
462 // Find '='
463 size_t eq = s.find('=', pos);
464 if (eq == std::string::npos) break;
465
466 std::string key = s.substr(pos, eq - pos);
467
468 // Find ';' or end
469 size_t semi = s.find(';', eq + 1);
470 std::string val;
471 if (semi == std::string::npos) {
472 val = s.substr(eq + 1);
473 pos = s.size();
474 } else {
475 val = s.substr(eq + 1, semi - eq - 1);
476 pos = semi + 1;
477 }
478
479 if (key == "scheme") {
480 try {
481 int32_t v = std::stoi(val);
482 if (v < 0 || v > 2) {
484 "quantization params: invalid scheme value"};
485 }
486 p.scheme = static_cast<QuantizationScheme>(v);
487 got_scheme = true;
488 } catch (...) {
490 "quantization params: malformed scheme"};
491 }
492 } else if (key == "scale") {
493 try {
494 p.scale = std::stof(val);
495 got_scale = true;
496 } catch (...) {
498 "quantization params: malformed scale"};
499 }
500 } else if (key == "zero_point") {
501 try {
502 p.zero_point = std::stof(val);
503 } catch (...) {
505 "quantization params: malformed zero_point"};
506 }
507 } else if (key == "dimension") {
508 try {
509 int v = std::stoi(val);
510 if (v <= 0) {
512 "quantization params: dimension must be positive"};
513 }
514 p.dimension = static_cast<uint32_t>(v);
515 got_dim = true;
516 } catch (...) {
518 "quantization params: malformed dimension"};
519 }
520 }
521 // Unknown keys are silently ignored for forward compatibility
522 }
523
524 if (!got_scheme || !got_scale || !got_dim) {
526 "quantization params: missing required field(s)"};
527 }
528 if (!std::isfinite(p.scale) || p.scale <= 0.0f) {
530 "quantization params: scale must be finite and positive"};
531 }
532 if (!std::isfinite(p.zero_point)) {
534 "quantization params: zero_point must be finite"};
535 }
536
537 return p;
538}
539
541 switch (scheme) {
544 return static_cast<size_t>(dimension);
546 return (static_cast<size_t>(dimension) + 1) / 2;
547 }
548 return static_cast<size_t>(dimension); // unreachable, silence warnings
549}
550
551
552// ===========================================================================
553//
554// IMPLEMENTATION -- Quantizer (scalar paths)
555//
556// ===========================================================================
557
558inline void Quantizer::quantize_symmetric_int8_scalar(
559 const float* in, uint8_t* out, uint32_t dim) const
560{
561 for (uint32_t i = 0; i < dim; ++i) {
562 float scaled = std::nearbyintf(in[i] * inv_scale_);
563 int32_t q = static_cast<int32_t>(scaled);
564 q = std::clamp(q, -127, 127);
565 // Store as int8_t reinterpreted to uint8_t
566 out[i] = static_cast<uint8_t>(static_cast<int8_t>(q));
567 }
568}
569
570inline void Quantizer::quantize_asymmetric_int8_scalar(
571 const float* in, uint8_t* out, uint32_t dim) const
572{
573 const float zp = params_.zero_point;
574 for (uint32_t i = 0; i < dim; ++i) {
575 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
576 int32_t q = static_cast<int32_t>(scaled);
577 q = std::clamp(q, 0, 255);
578 out[i] = static_cast<uint8_t>(q);
579 }
580}
581
582inline void Quantizer::quantize_symmetric_int4_scalar(
583 const float* in, uint8_t* out, uint32_t dim) const
584{
585 const size_t packed_len = (static_cast<size_t>(dim) + 1) / 2;
586 std::memset(out, 0, packed_len);
587
588 for (uint32_t i = 0; i < dim; ++i) {
589 float scaled = std::nearbyintf(in[i] * inv_scale_);
590 int32_t q = static_cast<int32_t>(scaled);
591 q = std::clamp(q, -7, 7);
592
593 // Encode as 4-bit two's complement: value & 0xF
594 uint8_t nibble = static_cast<uint8_t>(q & 0x0F);
595 uint32_t byte_idx = i / 2;
596
597 if ((i & 1) == 0) {
598 // Even index -> high nibble
599 out[byte_idx] |= static_cast<uint8_t>(nibble << 4);
600 } else {
601 // Odd index -> low nibble
602 out[byte_idx] |= nibble;
603 }
604 }
605}
606
607
608// ===========================================================================
609//
610// IMPLEMENTATION -- Quantizer (SIMD paths)
611//
612// ===========================================================================
613
614#if defined(SIGNET_QUANT_AVX2)
615
616inline void Quantizer::quantize_symmetric_int8_avx2(
617 const float* in, uint8_t* out, uint32_t dim) const
618{
619 const __m256 vscale = _mm256_set1_ps(inv_scale_);
620 const __m256 vmin = _mm256_set1_ps(-127.0f);
621 const __m256 vmax = _mm256_set1_ps(127.0f);
622
623 uint32_t i = 0;
624 for (; i + 8 <= dim; i += 8) {
625 __m256 vf = _mm256_loadu_ps(in + i);
626 vf = _mm256_mul_ps(vf, vscale);
627 vf = _mm256_round_ps(vf, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
628 vf = _mm256_max_ps(vf, vmin);
629 vf = _mm256_min_ps(vf, vmax);
630
631 // Convert to int32
632 __m256i vi32 = _mm256_cvtps_epi32(vf);
633
634 // Pack int32 -> int16 -> int8 (with saturation)
635 // Extract 128-bit halves
636 __m128i lo = _mm256_castsi256_si128(vi32);
637 __m128i hi = _mm256_extracti128_si256(vi32, 1);
638 __m128i vi16 = _mm_packs_epi32(lo, hi);
639 __m128i vi8 = _mm_packs_epi16(vi16, vi16);
640
641 // Store lower 8 bytes
642 // Note: _mm_packs_epi16 with itself duplicates; we only need lower 8
643 uint64_t packed;
644 std::memcpy(&packed, &vi8, 8);
645 std::memcpy(out + i, &packed, 8);
646 }
647
648 // Scalar tail
649 for (; i < dim; ++i) {
650 float scaled = std::nearbyintf(in[i] * inv_scale_);
651 int32_t q = static_cast<int32_t>(scaled);
652 q = std::clamp(q, -127, 127);
653 out[i] = static_cast<uint8_t>(static_cast<int8_t>(q));
654 }
655}
656
657inline void Quantizer::quantize_asymmetric_int8_avx2(
658 const float* in, uint8_t* out, uint32_t dim) const
659{
660 const __m256 vscale = _mm256_set1_ps(inv_scale_);
661 const __m256 vzp = _mm256_set1_ps(params_.zero_point);
662 const __m256 vmin = _mm256_set1_ps(0.0f);
663 const __m256 vmax = _mm256_set1_ps(255.0f);
664
665 uint32_t i = 0;
666 for (; i + 8 <= dim; i += 8) {
667 __m256 vf = _mm256_loadu_ps(in + i);
668 vf = _mm256_sub_ps(vf, vzp);
669 vf = _mm256_mul_ps(vf, vscale);
670 vf = _mm256_round_ps(vf, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
671 vf = _mm256_max_ps(vf, vmin);
672 vf = _mm256_min_ps(vf, vmax);
673
674 __m256i vi32 = _mm256_cvtps_epi32(vf);
675
676 // Pack int32 -> uint16 -> uint8 (unsigned saturation)
677 __m128i lo = _mm256_castsi256_si128(vi32);
678 __m128i hi = _mm256_extracti128_si256(vi32, 1);
679 __m128i vi16 = _mm_packus_epi32(lo, hi);
680 __m128i vi8 = _mm_packus_epi16(vi16, vi16);
681
682 uint64_t packed;
683 std::memcpy(&packed, &vi8, 8);
684 std::memcpy(out + i, &packed, 8);
685 }
686
687 // Scalar tail
688 const float zp = params_.zero_point;
689 for (; i < dim; ++i) {
690 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
691 int32_t q = static_cast<int32_t>(scaled);
692 q = std::clamp(q, 0, 255);
693 out[i] = static_cast<uint8_t>(q);
694 }
695}
696
697#elif defined(SIGNET_QUANT_SSE)
698
699inline void Quantizer::quantize_symmetric_int8_sse(
700 const float* in, uint8_t* out, uint32_t dim) const
701{
702 const __m128 vscale = _mm_set1_ps(inv_scale_);
703 const __m128 vmin = _mm_set1_ps(-127.0f);
704 const __m128 vmax = _mm_set1_ps(127.0f);
705
706 uint32_t i = 0;
707 for (; i + 4 <= dim; i += 4) {
708 __m128 vf = _mm_loadu_ps(in + i);
709 vf = _mm_mul_ps(vf, vscale);
710 // Clamp before cvt; cvtps_epi32 rounds to nearest (default FP mode)
711 vf = _mm_max_ps(vf, vmin);
712 vf = _mm_min_ps(vf, vmax);
713
714 __m128i vi32 = _mm_cvtps_epi32(vf);
715 __m128i vi16 = _mm_packs_epi32(vi32, vi32);
716 __m128i vi8 = _mm_packs_epi16(vi16, vi16);
717
718 uint32_t packed;
719 std::memcpy(&packed, &vi8, 4);
720 std::memcpy(out + i, &packed, 4);
721 }
722
723 for (; i < dim; ++i) {
724 float scaled = std::nearbyintf(in[i] * inv_scale_);
725 int32_t q = static_cast<int32_t>(scaled);
726 q = std::clamp(q, -127, 127);
727 out[i] = static_cast<uint8_t>(static_cast<int8_t>(q));
728 }
729}
730
731inline void Quantizer::quantize_asymmetric_int8_sse(
732 const float* in, uint8_t* out, uint32_t dim) const
733{
734 const __m128 vscale = _mm_set1_ps(inv_scale_);
735 const __m128 vzp = _mm_set1_ps(params_.zero_point);
736 const __m128 vmin = _mm_set1_ps(0.0f);
737 const __m128 vmax = _mm_set1_ps(255.0f);
738
739 uint32_t i = 0;
740 for (; i + 4 <= dim; i += 4) {
741 __m128 vf = _mm_loadu_ps(in + i);
742 vf = _mm_sub_ps(vf, vzp);
743 vf = _mm_mul_ps(vf, vscale);
744 // Clamp before cvt; cvtps_epi32 rounds to nearest (default FP mode)
745 vf = _mm_max_ps(vf, vmin);
746 vf = _mm_min_ps(vf, vmax);
747
748 __m128i vi32 = _mm_cvtps_epi32(vf);
749
750 // Pack int32 -> uint8 via saturation (SSE2-compatible path)
751 // packs_epi32 -> packs_epi16 gives signed int16 with saturation
752 // Then packus_epi16 gives unsigned uint8 with saturation
753 __m128i vi16 = _mm_packs_epi32(vi32, vi32);
754 __m128i vi8 = _mm_packus_epi16(vi16, vi16);
755
756 uint32_t packed;
757 std::memcpy(&packed, &vi8, 4);
758 std::memcpy(out + i, &packed, 4);
759 }
760
761 const float zp = params_.zero_point;
762 for (; i < dim; ++i) {
763 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
764 int32_t q = static_cast<int32_t>(scaled);
765 q = std::clamp(q, 0, 255);
766 out[i] = static_cast<uint8_t>(q);
767 }
768}
769
770#elif defined(SIGNET_QUANT_NEON)
771
772inline void Quantizer::quantize_symmetric_int8_neon(
773 const float* in, uint8_t* out, uint32_t dim) const
774{
775 const float32x4_t vscale = vdupq_n_f32(inv_scale_);
776
777 uint32_t i = 0;
778 for (; i + 4 <= dim; i += 4) {
779 float32x4_t vf = vld1q_f32(in + i);
780 vf = vmulq_f32(vf, vscale);
781 vf = vrndnq_f32(vf); // round to nearest
782
783 // Clamp to [-127, 127]
784 vf = vmaxq_f32(vf, vdupq_n_f32(-127.0f));
785 vf = vminq_f32(vf, vdupq_n_f32(127.0f));
786
787 int32x4_t vi32 = vcvtq_s32_f32(vf);
788 int16x4_t vi16 = vmovn_s32(vi32);
789 int8x8_t vi8 = vmovn_s16(vcombine_s16(vi16, vi16));
790
791 // Store lower 4 bytes
792 vst1_lane_u32(reinterpret_cast<uint32_t*>(out + i),
793 vreinterpret_u32_s8(vi8), 0);
794 }
795
796 for (; i < dim; ++i) {
797 float scaled = std::nearbyintf(in[i] * inv_scale_);
798 int32_t q = static_cast<int32_t>(scaled);
799 q = std::clamp(q, -127, 127);
800 out[i] = static_cast<uint8_t>(static_cast<int8_t>(q));
801 }
802}
803
804inline void Quantizer::quantize_asymmetric_int8_neon(
805 const float* in, uint8_t* out, uint32_t dim) const
806{
807 const float32x4_t vscale = vdupq_n_f32(inv_scale_);
808 const float32x4_t vzp = vdupq_n_f32(params_.zero_point);
809
810 uint32_t i = 0;
811 for (; i + 4 <= dim; i += 4) {
812 float32x4_t vf = vld1q_f32(in + i);
813 vf = vsubq_f32(vf, vzp);
814 vf = vmulq_f32(vf, vscale);
815 vf = vrndnq_f32(vf);
816
817 // Clamp to [0, 255]
818 vf = vmaxq_f32(vf, vdupq_n_f32(0.0f));
819 vf = vminq_f32(vf, vdupq_n_f32(255.0f));
820
821 uint32x4_t vu32 = vcvtq_u32_f32(vf);
822 uint16x4_t vu16 = vmovn_u32(vu32);
823 uint8x8_t vu8 = vmovn_u16(vcombine_u16(vu16, vu16));
824
825 vst1_lane_u32(reinterpret_cast<uint32_t*>(out + i),
826 vreinterpret_u32_u8(vu8), 0);
827 }
828
829 const float zp = params_.zero_point;
830 for (; i < dim; ++i) {
831 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
832 int32_t q = static_cast<int32_t>(scaled);
833 q = std::clamp(q, 0, 255);
834 out[i] = static_cast<uint8_t>(q);
835 }
836}
837
838#endif // SIMD quantize helpers
839
840
841// ---------------------------------------------------------------------------
842// Quantizer::quantize -- dispatch to best available path
843// ---------------------------------------------------------------------------
844inline void Quantizer::quantize(const float* input, uint8_t* output) const {
845 const uint32_t dim = params_.dimension;
846
847 switch (params_.scheme) {
849#if defined(SIGNET_QUANT_AVX2)
850 quantize_symmetric_int8_avx2(input, output, dim);
851#elif defined(SIGNET_QUANT_SSE)
852 quantize_symmetric_int8_sse(input, output, dim);
853#elif defined(SIGNET_QUANT_NEON)
854 quantize_symmetric_int8_neon(input, output, dim);
855#else
856 quantize_symmetric_int8_scalar(input, output, dim);
857#endif
858 break;
859
861#if defined(SIGNET_QUANT_AVX2)
862 quantize_asymmetric_int8_avx2(input, output, dim);
863#elif defined(SIGNET_QUANT_SSE)
864 quantize_asymmetric_int8_sse(input, output, dim);
865#elif defined(SIGNET_QUANT_NEON)
866 quantize_asymmetric_int8_neon(input, output, dim);
867#else
868 quantize_asymmetric_int8_scalar(input, output, dim);
869#endif
870 break;
871
873 // INT4 is always scalar (nibble packing makes SIMD less beneficial)
874 quantize_symmetric_int4_scalar(input, output, dim);
875 break;
876 }
877}
878
879inline std::vector<uint8_t> Quantizer::quantize_batch(
880 const float* input, size_t num_vectors) const
881{
882 const size_t bpv = params_.bytes_per_vector();
883 std::vector<uint8_t> result(num_vectors * bpv);
884
885 for (size_t v = 0; v < num_vectors; ++v) {
886 quantize(input + v * params_.dimension,
887 result.data() + v * bpv);
888 }
889
890 return result;
891}
892
893
894// ===========================================================================
895//
896// IMPLEMENTATION -- Dequantizer (scalar paths)
897//
898// ===========================================================================
899
900inline void Dequantizer::dequantize_symmetric_int8_scalar(
901 const uint8_t* in, float* out, uint32_t dim) const
902{
903 const float s = params_.scale;
904 const float range_max = 127.0f * s;
905 const float range_min = -127.0f * s;
906 for (uint32_t i = 0; i < dim; ++i) {
907 int8_t q = static_cast<int8_t>(in[i]);
908 out[i] = static_cast<float>(q) * s;
909 // Clamp to quantization range bounds (EU AI Act Art.12 anomaly tracking)
910 if (out[i] < range_min || out[i] > range_max) {
911 out[i] = std::clamp(out[i], range_min, range_max);
912 anomaly_count_.fetch_add(1, std::memory_order_relaxed);
913 }
914 }
915}
916
917inline void Dequantizer::dequantize_asymmetric_int8_scalar(
918 const uint8_t* in, float* out, uint32_t dim) const
919{
920 const float s = params_.scale;
921 const float zp = params_.zero_point;
922 const float range_min = zp; // 0 * s + zp
923 const float range_max = 255.0f * s + zp; // 255 * s + zp
924 for (uint32_t i = 0; i < dim; ++i) {
925 out[i] = static_cast<float>(in[i]) * s + zp;
926 // Clamp to quantization range bounds (EU AI Act Art.12 anomaly tracking)
927 if (out[i] < range_min || out[i] > range_max) {
928 out[i] = std::clamp(out[i], range_min, range_max);
929 anomaly_count_.fetch_add(1, std::memory_order_relaxed);
930 }
931 }
932}
933
934inline void Dequantizer::dequantize_symmetric_int4_scalar(
935 const uint8_t* in, float* out, uint32_t dim) const
936{
937 const float s = params_.scale;
938 const float range_max = 7.0f * s;
939 const float range_min = -7.0f * s;
940 for (uint32_t i = 0; i < dim; ++i) {
941 uint32_t byte_idx = i / 2;
942 uint8_t nibble;
943 if ((i & 1) == 0) {
944 // Even index -> high nibble
945 nibble = (in[byte_idx] >> 4) & 0x0F;
946 } else {
947 // Odd index -> low nibble
948 nibble = in[byte_idx] & 0x0F;
949 }
950
951 // Sign-extend 4-bit two's complement to int8 (CWE-194).
952 // Portable, branchless, defined behavior per C++20 standard:
953 // XOR with 0x08 flips the sign bit, subtract 0x08 restores offset.
954 int8_t signed_val = static_cast<int8_t>((nibble ^ 0x08) - 0x08);
955
956 out[i] = static_cast<float>(signed_val) * s;
957 // Clamp to quantization range bounds (EU AI Act Art.12 anomaly tracking)
958 if (out[i] < range_min || out[i] > range_max) {
959 out[i] = std::clamp(out[i], range_min, range_max);
960 anomaly_count_.fetch_add(1, std::memory_order_relaxed);
961 }
962 }
963}
964
965
966// ===========================================================================
967//
968// IMPLEMENTATION -- Dequantizer (SIMD paths)
969//
970// ===========================================================================
971
972#if defined(SIGNET_QUANT_AVX2)
973
974inline void Dequantizer::dequantize_symmetric_int8_avx2(
975 const uint8_t* in, float* out, uint32_t dim) const
976{
977 const __m256 vscale = _mm256_set1_ps(params_.scale);
978
979 uint32_t i = 0;
980 for (; i + 8 <= dim; i += 8) {
981 // Load 8 int8 values, sign-extend to int32, convert to float
982 // Use a 64-bit load into low half of __m128i
983 __m128i raw = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(in + i));
984
985 // Sign-extend int8 -> int16 (lower 8 bytes)
986 __m128i vi16 = _mm_cvtepi8_epi16(raw);
987
988 // Sign-extend int16 -> int32 (lower 4 and upper 4 separately)
989 __m128i lo32 = _mm_cvtepi16_epi32(vi16);
990 __m128i hi16 = _mm_unpackhi_epi64(vi16, vi16);
991 __m128i hi32 = _mm_cvtepi16_epi32(hi16);
992
993 // Combine into 256-bit
994 __m256i vi32 = _mm256_set_m128i(hi32, lo32);
995
996 // Convert int32 -> float and multiply by scale
997 __m256 vf = _mm256_cvtepi32_ps(vi32);
998 vf = _mm256_mul_ps(vf, vscale);
999
1000 _mm256_storeu_ps(out + i, vf);
1001 }
1002
1003 // Scalar tail
1004 const float s = params_.scale;
1005 for (; i < dim; ++i) {
1006 int8_t q = static_cast<int8_t>(in[i]);
1007 out[i] = static_cast<float>(q) * s;
1008 }
1009}
1010
1011inline void Dequantizer::dequantize_asymmetric_int8_avx2(
1012 const uint8_t* in, float* out, uint32_t dim) const
1013{
1014 const __m256 vscale = _mm256_set1_ps(params_.scale);
1015 const __m256 vzp = _mm256_set1_ps(params_.zero_point);
1016
1017 uint32_t i = 0;
1018 for (; i + 8 <= dim; i += 8) {
1019 // Load 8 uint8 values, zero-extend to int32
1020 __m128i raw = _mm_loadl_epi64(reinterpret_cast<const __m128i*>(in + i));
1021 __m128i vu16 = _mm_cvtepu8_epi16(raw);
1022
1023 __m128i lo32 = _mm_cvtepu16_epi32(vu16);
1024 __m128i hi16 = _mm_unpackhi_epi64(vu16, vu16);
1025 __m128i hi32 = _mm_cvtepu16_epi32(hi16);
1026
1027 __m256i vu32 = _mm256_set_m128i(hi32, lo32);
1028 __m256 vf = _mm256_cvtepi32_ps(vu32);
1029
1030 // dequantized = value * scale + zero_point
1031 vf = _mm256_fmadd_ps(vf, vscale, vzp);
1032
1033 _mm256_storeu_ps(out + i, vf);
1034 }
1035
1036 const float s = params_.scale;
1037 const float zp = params_.zero_point;
1038 for (; i < dim; ++i) {
1039 out[i] = static_cast<float>(in[i]) * s + zp;
1040 }
1041}
1042
1043#elif defined(SIGNET_QUANT_SSE)
1044
1045inline void Dequantizer::dequantize_symmetric_int8_sse(
1046 const uint8_t* in, float* out, uint32_t dim) const
1047{
1048 const __m128 vscale = _mm_set1_ps(params_.scale);
1049
1050 uint32_t i = 0;
1051 for (; i + 4 <= dim; i += 4) {
1052 // Load 4 int8 values into the lowest 32 bits
1053 int32_t raw32;
1054 std::memcpy(&raw32, in + i, 4);
1055 __m128i raw = _mm_cvtsi32_si128(raw32);
1056
1057 // Sign-extend int8 -> int16 (SSE2: unpacklo with zero, then fix sign)
1058 __m128i vi16 = _mm_unpacklo_epi8(raw, raw); // duplicate bytes
1059 vi16 = _mm_srai_epi16(vi16, 8); // arithmetic shift right by 8
1060
1061 // Sign-extend int16 -> int32 (SSE2: unpacklo with sign-extension)
1062 __m128i sign = _mm_srai_epi16(vi16, 15); // all-1s or all-0s per lane
1063 __m128i vi32 = _mm_unpacklo_epi16(vi16, sign);
1064
1065 __m128 vf = _mm_cvtepi32_ps(vi32);
1066 vf = _mm_mul_ps(vf, vscale);
1067
1068 _mm_storeu_ps(out + i, vf);
1069 }
1070
1071 const float s = params_.scale;
1072 for (; i < dim; ++i) {
1073 int8_t q = static_cast<int8_t>(in[i]);
1074 out[i] = static_cast<float>(q) * s;
1075 }
1076}
1077
1078inline void Dequantizer::dequantize_asymmetric_int8_sse(
1079 const uint8_t* in, float* out, uint32_t dim) const
1080{
1081 const __m128 vscale = _mm_set1_ps(params_.scale);
1082 const __m128 vzp = _mm_set1_ps(params_.zero_point);
1083 const __m128i vzero = _mm_setzero_si128();
1084
1085 uint32_t i = 0;
1086 for (; i + 4 <= dim; i += 4) {
1087 int32_t raw32;
1088 std::memcpy(&raw32, in + i, 4);
1089 __m128i raw = _mm_cvtsi32_si128(raw32);
1090
1091 // Zero-extend uint8 -> uint16 -> int32 (SSE2)
1092 __m128i vu16 = _mm_unpacklo_epi8(raw, vzero);
1093 __m128i vu32 = _mm_unpacklo_epi16(vu16, vzero);
1094
1095 __m128 vf = _mm_cvtepi32_ps(vu32);
1096 // dequantized = value * scale + zero_point
1097 vf = _mm_add_ps(_mm_mul_ps(vf, vscale), vzp);
1098
1099 _mm_storeu_ps(out + i, vf);
1100 }
1101
1102 const float s = params_.scale;
1103 const float zp = params_.zero_point;
1104 for (; i < dim; ++i) {
1105 out[i] = static_cast<float>(in[i]) * s + zp;
1106 }
1107}
1108
1109#elif defined(SIGNET_QUANT_NEON)
1110
1111inline void Dequantizer::dequantize_symmetric_int8_neon(
1112 const uint8_t* in, float* out, uint32_t dim) const
1113{
1114 const float32x4_t vscale = vdupq_n_f32(params_.scale);
1115
1116 uint32_t i = 0;
1117 for (; i + 4 <= dim; i += 4) {
1118 // Load 4 int8 values
1119 int8x8_t raw8 = vreinterpret_s8_u32(
1120 vld1_dup_u32(reinterpret_cast<const uint32_t*>(in + i)));
1121
1122 // Widen int8 -> int16 -> int32
1123 int16x8_t vi16 = vmovl_s8(raw8);
1124 int32x4_t vi32 = vmovl_s16(vget_low_s16(vi16));
1125
1126 float32x4_t vf = vcvtq_f32_s32(vi32);
1127 vf = vmulq_f32(vf, vscale);
1128
1129 vst1q_f32(out + i, vf);
1130 }
1131
1132 const float s = params_.scale;
1133 for (; i < dim; ++i) {
1134 int8_t q = static_cast<int8_t>(in[i]);
1135 out[i] = static_cast<float>(q) * s;
1136 }
1137}
1138
1139inline void Dequantizer::dequantize_asymmetric_int8_neon(
1140 const uint8_t* in, float* out, uint32_t dim) const
1141{
1142 const float32x4_t vscale = vdupq_n_f32(params_.scale);
1143 const float32x4_t vzp = vdupq_n_f32(params_.zero_point);
1144
1145 uint32_t i = 0;
1146 for (; i + 4 <= dim; i += 4) {
1147 uint8x8_t raw8 = vreinterpret_u8_u32(
1148 vld1_dup_u32(reinterpret_cast<const uint32_t*>(in + i)));
1149
1150 uint16x8_t vu16 = vmovl_u8(raw8);
1151 uint32x4_t vu32 = vmovl_u16(vget_low_u16(vu16));
1152
1153 float32x4_t vf = vcvtq_f32_u32(vu32);
1154 vf = vmlaq_f32(vzp, vf, vscale); // vf = vf * scale + zero_point
1155
1156 vst1q_f32(out + i, vf);
1157 }
1158
1159 const float s = params_.scale;
1160 const float zp = params_.zero_point;
1161 for (; i < dim; ++i) {
1162 out[i] = static_cast<float>(in[i]) * s + zp;
1163 }
1164}
1165
1166#endif // SIMD dequantize helpers
1167
1168
1169// ---------------------------------------------------------------------------
1170// Dequantizer::dequantize -- dispatch to best available path
1171// ---------------------------------------------------------------------------
1172inline void Dequantizer::dequantize(const uint8_t* input, float* output) const {
1173 const uint32_t dim = params_.dimension;
1174
1175 switch (params_.scheme) {
1177#if defined(SIGNET_QUANT_AVX2)
1178 dequantize_symmetric_int8_avx2(input, output, dim);
1179#elif defined(SIGNET_QUANT_SSE)
1180 dequantize_symmetric_int8_sse(input, output, dim);
1181#elif defined(SIGNET_QUANT_NEON)
1182 dequantize_symmetric_int8_neon(input, output, dim);
1183#else
1184 dequantize_symmetric_int8_scalar(input, output, dim);
1185#endif
1186 break;
1187
1189#if defined(SIGNET_QUANT_AVX2)
1190 dequantize_asymmetric_int8_avx2(input, output, dim);
1191#elif defined(SIGNET_QUANT_SSE)
1192 dequantize_asymmetric_int8_sse(input, output, dim);
1193#elif defined(SIGNET_QUANT_NEON)
1194 dequantize_asymmetric_int8_neon(input, output, dim);
1195#else
1196 dequantize_asymmetric_int8_scalar(input, output, dim);
1197#endif
1198 break;
1199
1201 dequantize_symmetric_int4_scalar(input, output, dim);
1202 break;
1203 }
1204}
1205
1206inline std::vector<std::vector<float>> Dequantizer::dequantize_batch(
1207 const uint8_t* input, size_t num_vectors) const
1208{
1209 const size_t bpv = params_.bytes_per_vector();
1210 const uint32_t dim = params_.dimension;
1211
1212 std::vector<std::vector<float>> result(num_vectors);
1213
1214 for (size_t v = 0; v < num_vectors; ++v) {
1215 result[v].resize(dim);
1216 dequantize(input + v * bpv, result[v].data());
1217 }
1218
1219 return result;
1220}
1221
1222inline std::vector<float> Dequantizer::dequantize_flat(
1223 const uint8_t* input, size_t num_vectors) const
1224{
1225 const size_t bpv = params_.bytes_per_vector();
1226 const uint32_t dim = params_.dimension;
1227
1228 std::vector<float> result(num_vectors * dim);
1229
1230 for (size_t v = 0; v < num_vectors; ++v) {
1231 dequantize(input + v * bpv, result.data() + v * dim);
1232 }
1233
1234 return result;
1235}
1236
1237
1238// ===========================================================================
1239//
1240// IMPLEMENTATION -- QuantizedVectorWriter
1241//
1242// ===========================================================================
1243
1244inline void QuantizedVectorWriter::add(const float* data) {
1245 const size_t bpv = quantizer_.params().bytes_per_vector();
1246 const size_t offset = buf_.size();
1247 buf_.resize(offset + bpv);
1248 quantizer_.quantize(data, buf_.data() + offset);
1249 ++num_vectors_;
1250}
1251
1252inline void QuantizedVectorWriter::add_raw(const uint8_t* data) {
1253 const size_t bpv = quantizer_.params().bytes_per_vector();
1254 buf_.insert(buf_.end(), data, data + bpv);
1255 ++num_vectors_;
1256}
1257
1258inline void QuantizedVectorWriter::add_batch(const float* data, size_t num_vectors) {
1259 const uint32_t dim = quantizer_.params().dimension;
1260 const size_t bpv = quantizer_.params().bytes_per_vector();
1261 const size_t offset = buf_.size();
1262 buf_.resize(offset + num_vectors * bpv);
1263
1264 for (size_t v = 0; v < num_vectors; ++v) {
1265 quantizer_.quantize(data + v * dim,
1266 buf_.data() + offset + v * bpv);
1267 }
1268
1269 num_vectors_ += num_vectors;
1270}
1271
1272inline std::vector<uint8_t> QuantizedVectorWriter::flush() {
1273 std::vector<uint8_t> result = std::move(buf_);
1274 buf_.clear();
1275 num_vectors_ = 0;
1276 return result;
1277}
1278
1280 const std::string& name,
1281 const QuantizationParams& params)
1282{
1284 cd.name = name;
1288 cd.type_length = static_cast<int32_t>(params.bytes_per_vector());
1289 return cd;
1290}
1291
1292
1293// ===========================================================================
1294//
1295// IMPLEMENTATION -- QuantizedVectorReader
1296//
1297// ===========================================================================
1298
1299inline std::vector<std::vector<float>> QuantizedVectorReader::read_page(
1300 const uint8_t* data, size_t data_size)
1301{
1302 const size_t bpv = params_.bytes_per_vector();
1303 if (bpv == 0) return {};
1304
1305 if (data_size % bpv != 0) {
1306 return {};
1307 }
1308
1309 const size_t num = data_size / bpv;
1310 return dequantizer_.dequantize_batch(data, num);
1311}
1312
1313inline std::vector<float> QuantizedVectorReader::read_vector(
1314 const uint8_t* page_data, size_t page_size, size_t index)
1315{
1316 const size_t bpv = params_.bytes_per_vector();
1317 const size_t offset = index * bpv;
1318
1319 std::vector<float> result(params_.dimension);
1320
1321 if (offset + bpv > page_size) {
1322 // Out of bounds -- return zero vector
1323 return result;
1324 }
1325
1326 dequantizer_.dequantize(page_data + offset, result.data());
1327 return result;
1328}
1329
1331 const uint8_t* data, size_t data_size)
1332{
1333 const size_t bpv = params_.bytes_per_vector();
1334 if (bpv == 0) {
1336 "quantized vector: bytes_per_vector is zero"};
1337 }
1338
1339 if (data_size % bpv != 0) {
1341 "quantized vector page size is not a multiple of bytes_per_vector"};
1342 }
1343
1344 RawResult r;
1345 r.data = data;
1346 r.num_vectors = data_size / bpv;
1347 return r;
1348}
1349
1350} // namespace signet::forge
Dequantizes INT8/INT4 quantized vectors back to float32.
void dequantize(const uint8_t *input, float *output) const
Dequantize a single quantized vector to float32.
uint64_t anomaly_count() const
EU AI Act Art.12 anomaly tracking: number of dequantized values that fell outside the representable q...
Dequantizer(QuantizationParams params)
Construct a dequantizer with the given parameters.
std::vector< float > dequantize_flat(const uint8_t *input, size_t num_vectors) const
Flat batch dequantize: returns all floats in one contiguous buffer.
const QuantizationParams & params() const
Access the quantization parameters.
std::vector< std::vector< float > > dequantize_batch(const uint8_t *input, size_t num_vectors) const
Dequantize a batch of vectors, returning a vector-of-vectors.
Reads quantized page data (FIXED_LEN_BYTE_ARRAY) and dequantizes to float32 on demand.
expected< RawResult > read_raw(const uint8_t *data, size_t data_size)
Read raw quantized bytes without dequantization.
std::vector< float > read_vector(const uint8_t *page_data, size_t page_size, size_t index)
Read and dequantize a single vector by index within the page.
std::vector< std::vector< float > > read_page(const uint8_t *data, size_t data_size)
Read an entire page and dequantize all vectors to float32.
QuantizedVectorReader(QuantizationParams params)
Construct a reader with the given quantization parameters.
Accumulates float32 vectors, quantizes them, and produces FIXED_LEN_BYTE_ARRAY page data suitable for...
size_t num_vectors() const
Number of vectors currently buffered.
std::vector< uint8_t > flush()
Flush accumulated data as FIXED_LEN_BYTE_ARRAY page bytes.
static ColumnDescriptor make_descriptor(const std::string &name, const QuantizationParams &params)
Create a ColumnDescriptor suitable for a quantized vector column.
QuantizedVectorWriter(QuantizationParams params)
Construct a writer with the given quantization parameters.
void add(const float *data)
Add a single float32 vector (quantized internally).
void add_batch(const float *data, size_t num_vectors)
Add a batch of float32 vectors (quantized internally).
void add_raw(const uint8_t *data)
Add pre-quantized raw bytes for one vector.
const QuantizationParams & params() const
Access the quantization parameters.
Quantizes float32 vectors to INT8 or INT4 representation.
std::vector< uint8_t > quantize_batch(const float *input, size_t num_vectors) const
Quantize a batch of vectors into a flat buffer of quantized bytes.
Quantizer(QuantizationParams params)
Construct a quantizer with the given parameters.
const QuantizationParams & params() const
Access the quantization parameters.
void quantize(const float *input, uint8_t *output) const
Quantize a single float32 vector into the output buffer.
A lightweight result type that holds either a success value of type T or an Error.
Definition error.hpp:145
@ FIXED_LEN_BYTE_ARRAY
Fixed-length byte array (UUID, vectors, decimals).
@ FLOAT32_VECTOR
ML embedding vector — FIXED_LEN_BYTE_ARRAY(dim*4).
@ INVALID_FILE
The file is not a valid Parquet file (e.g. missing or wrong magic bytes).
@ INTERNAL_ERROR
An unexpected internal error that does not fit any other category.
@ CORRUPT_PAGE
A data page failed integrity checks (bad CRC, truncated, or exceeds size limits).
QuantizationScheme
Identifies the quantization method used for vector compression.
@ SYMMETRIC_INT8
value = round(float / scale), range [-127, 127].
@ SYMMETRIC_INT4
value = round(float / scale), range [-7, 7], nibble-packed.
@ ASYMMETRIC_INT8
value = round((float - zero_point) / scale), range [0, 255].
@ REQUIRED
Exactly one value per row (non-nullable).
Descriptor for a single column in a Parquet schema.
Definition types.hpp:152
int32_t type_length
Byte length for FIXED_LEN_BYTE_ARRAY columns (-1 = N/A).
Definition types.hpp:157
LogicalType logical_type
Semantic annotation (STRING, TIMESTAMP_NS, etc.).
Definition types.hpp:155
Repetition repetition
Nullability / cardinality.
Definition types.hpp:156
std::string name
Column name (unique within a schema).
Definition types.hpp:153
PhysicalType physical_type
On-disk storage type.
Definition types.hpp:154
Lightweight error value carrying an ErrorCode and a human-readable message.
Definition error.hpp:101
Parameters that fully describe a quantization mapping.
static QuantizationParams compute(const float *data, size_t num_vectors, uint32_t dim, QuantizationScheme scheme)
Compute optimal quantization parameters from a batch of vectors.
std::string serialize() const
Serialize to a compact key-value string for Parquet metadata.
float scale
Scale factor (float units per quantization step).
size_t bytes_per_vector() const
Storage size per vector in bytes.
static expected< QuantizationParams > deserialize(const std::string &s)
Deserialize from the key-value string produced by serialize().
QuantizationScheme scheme
Quantization scheme (symmetric/asymmetric, INT8/INT4).
uint32_t dimension
Vector dimension (number of float elements).
float zero_point
Offset (used by ASYMMETRIC_INT8 only).
Result of a raw (non-dequantized) page read.
size_t num_vectors
Number of vectors in the page.
const uint8_t * data
Pointer to quantized byte data (not owned).
Parquet format enumerations, type traits, and statistics structs.