41 #include <immintrin.h>
42 #define SIGNET_QUANT_AVX2 1
43#elif defined(__SSE4_2__) || defined(__SSE4_1__) || defined(__SSE2__)
44 #include <immintrin.h>
45 #define SIGNET_QUANT_SSE 1
46#elif defined(__ARM_NEON) || defined(__ARM_NEON__)
48 #define SIGNET_QUANT_NEON 1
103 [[nodiscard]]
inline std::string
serialize()
const;
136 : params_(std::move(
params))
137 , inv_scale_((std::isfinite(params_.scale) && params_.scale > 0.0f)
138 ? 1.0f / params_.scale
140 if (!std::isfinite(params_.
scale) || params_.
scale <= 0.0f) {
141 params_.
scale = 1.0f;
153 inline void quantize(
const float* input, uint8_t* output)
const;
161 const float* input,
size_t num_vectors)
const;
171 inline void quantize_symmetric_int8_scalar(
const float* in, uint8_t* out, uint32_t dim)
const;
172 inline void quantize_asymmetric_int8_scalar(
const float* in, uint8_t* out, uint32_t dim)
const;
173 inline void quantize_symmetric_int4_scalar(
const float* in, uint8_t* out, uint32_t dim)
const;
176#if defined(SIGNET_QUANT_AVX2)
177 inline void quantize_symmetric_int8_avx2(
const float* in, uint8_t* out, uint32_t dim)
const;
178 inline void quantize_asymmetric_int8_avx2(
const float* in, uint8_t* out, uint32_t dim)
const;
179#elif defined(SIGNET_QUANT_SSE)
180 inline void quantize_symmetric_int8_sse(
const float* in, uint8_t* out, uint32_t dim)
const;
181 inline void quantize_asymmetric_int8_sse(
const float* in, uint8_t* out, uint32_t dim)
const;
182#elif defined(SIGNET_QUANT_NEON)
183 inline void quantize_symmetric_int8_neon(
const float* in, uint8_t* out, uint32_t dim)
const;
184 inline void quantize_asymmetric_int8_neon(
const float* in, uint8_t* out, uint32_t dim)
const;
202 : params_(std::move(
params)) {}
208 inline void dequantize(
const uint8_t* input,
float* output)
const;
216 const uint8_t* input,
size_t num_vectors)
const;
224 const uint8_t* input,
size_t num_vectors)
const;
231 [[nodiscard]] uint64_t
anomaly_count()
const {
return anomaly_count_.load(std::memory_order_relaxed); }
235 mutable std::atomic<uint64_t> anomaly_count_{0};
238 inline void dequantize_symmetric_int8_scalar(
const uint8_t* in,
float* out, uint32_t dim)
const;
239 inline void dequantize_asymmetric_int8_scalar(
const uint8_t* in,
float* out, uint32_t dim)
const;
240 inline void dequantize_symmetric_int4_scalar(
const uint8_t* in,
float* out, uint32_t dim)
const;
243#if defined(SIGNET_QUANT_AVX2)
244 inline void dequantize_symmetric_int8_avx2(
const uint8_t* in,
float* out, uint32_t dim)
const;
245 inline void dequantize_asymmetric_int8_avx2(
const uint8_t* in,
float* out, uint32_t dim)
const;
246#elif defined(SIGNET_QUANT_SSE)
247 inline void dequantize_symmetric_int8_sse(
const uint8_t* in,
float* out, uint32_t dim)
const;
248 inline void dequantize_asymmetric_int8_sse(
const uint8_t* in,
float* out, uint32_t dim)
const;
249#elif defined(SIGNET_QUANT_NEON)
250 inline void dequantize_symmetric_int8_neon(
const uint8_t* in,
float* out, uint32_t dim)
const;
251 inline void dequantize_asymmetric_int8_neon(
const uint8_t* in,
float* out, uint32_t dim)
const;
268 : quantizer_(
params), num_vectors_(0) {}
272 inline void add(
const float* data);
276 inline void add_raw(
const uint8_t* data);
288 [[nodiscard]]
inline std::vector<uint8_t>
flush();
302 const std::string& name,
307 return quantizer_.
params();
312 std::vector<uint8_t> buf_;
329 : params_(std::move(params)), dequantizer_(params_) {}
335 [[nodiscard]]
inline std::vector<std::vector<float>>
read_page(
336 const uint8_t* data,
size_t data_size);
343 [[nodiscard]]
inline std::vector<float>
read_vector(
344 const uint8_t* page_data,
size_t page_size,
size_t index);
358 const uint8_t* data,
size_t data_size);
382 if (num_vectors == 0 || dim == 0) {
388 const size_t total = num_vectors *
static_cast<size_t>(dim);
392 float vmin = (std::numeric_limits<float>::max)();
393 float vmax = (std::numeric_limits<float>::lowest)();
394 for (
size_t i = 0; i < total; ++i) {
395 const float v = data[i];
396 if (!std::isfinite(v))
continue;
397 if (v < vmin) vmin = v;
398 if (v > vmax) vmax = v;
410 const float abs_max = (std::max)(std::fabs(vmin), std::fabs(vmax));
411 p.
scale = (abs_max > 0.0f) ? (abs_max / 127.0f) : 1.0f;
417 const float range = vmax - vmin;
418 p.
scale = (range > 0.0f) ? (range / 255.0f) : 1.0f;
424 const float abs_max = (std::max)(std::fabs(vmin), std::fabs(vmax));
425 p.
scale = (abs_max > 0.0f) ? (abs_max / 7.0f) : 1.0f;
444 s += std::to_string(
static_cast<int32_t
>(
scheme));
446 {
char buf[32]; std::snprintf(buf,
sizeof(buf),
"%.9g",
static_cast<double>(
scale)); s += buf; }
448 {
char buf[32]; std::snprintf(buf,
sizeof(buf),
"%.9g",
static_cast<double>(
zero_point)); s += buf; }
458 bool got_scheme =
false, got_scale =
false, got_dim =
false;
461 while (pos < s.size()) {
463 size_t eq = s.find(
'=', pos);
464 if (eq == std::string::npos)
break;
466 std::string key = s.substr(pos, eq - pos);
469 size_t semi = s.find(
';', eq + 1);
471 if (semi == std::string::npos) {
472 val = s.substr(eq + 1);
475 val = s.substr(eq + 1, semi - eq - 1);
479 if (key ==
"scheme") {
481 int32_t v = std::stoi(val);
482 if (v < 0 || v > 2) {
484 "quantization params: invalid scheme value"};
490 "quantization params: malformed scheme"};
492 }
else if (key ==
"scale") {
494 p.
scale = std::stof(val);
498 "quantization params: malformed scale"};
500 }
else if (key ==
"zero_point") {
505 "quantization params: malformed zero_point"};
507 }
else if (key ==
"dimension") {
509 int v = std::stoi(val);
512 "quantization params: dimension must be positive"};
518 "quantization params: malformed dimension"};
524 if (!got_scheme || !got_scale || !got_dim) {
526 "quantization params: missing required field(s)"};
528 if (!std::isfinite(p.
scale) || p.
scale <= 0.0f) {
530 "quantization params: scale must be finite and positive"};
534 "quantization params: zero_point must be finite"};
546 return (
static_cast<size_t>(
dimension) + 1) / 2;
558inline void Quantizer::quantize_symmetric_int8_scalar(
559 const float* in, uint8_t* out, uint32_t dim)
const
561 for (uint32_t i = 0; i < dim; ++i) {
562 float scaled = std::nearbyintf(in[i] * inv_scale_);
563 int32_t q =
static_cast<int32_t
>(scaled);
564 q = std::clamp(q, -127, 127);
566 out[i] =
static_cast<uint8_t
>(
static_cast<int8_t
>(q));
570inline void Quantizer::quantize_asymmetric_int8_scalar(
571 const float* in, uint8_t* out, uint32_t dim)
const
574 for (uint32_t i = 0; i < dim; ++i) {
575 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
576 int32_t q =
static_cast<int32_t
>(scaled);
577 q = std::clamp(q, 0, 255);
578 out[i] =
static_cast<uint8_t
>(q);
582inline void Quantizer::quantize_symmetric_int4_scalar(
583 const float* in, uint8_t* out, uint32_t dim)
const
585 const size_t packed_len = (
static_cast<size_t>(dim) + 1) / 2;
586 std::memset(out, 0, packed_len);
588 for (uint32_t i = 0; i < dim; ++i) {
589 float scaled = std::nearbyintf(in[i] * inv_scale_);
590 int32_t q =
static_cast<int32_t
>(scaled);
591 q = std::clamp(q, -7, 7);
594 uint8_t nibble =
static_cast<uint8_t
>(q & 0x0F);
595 uint32_t byte_idx = i / 2;
599 out[byte_idx] |=
static_cast<uint8_t
>(nibble << 4);
602 out[byte_idx] |= nibble;
614#if defined(SIGNET_QUANT_AVX2)
616inline void Quantizer::quantize_symmetric_int8_avx2(
617 const float* in, uint8_t* out, uint32_t dim)
const
619 const __m256 vscale = _mm256_set1_ps(inv_scale_);
620 const __m256 vmin = _mm256_set1_ps(-127.0f);
621 const __m256 vmax = _mm256_set1_ps(127.0f);
624 for (; i + 8 <= dim; i += 8) {
625 __m256 vf = _mm256_loadu_ps(in + i);
626 vf = _mm256_mul_ps(vf, vscale);
627 vf = _mm256_round_ps(vf, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
628 vf = _mm256_max_ps(vf, vmin);
629 vf = _mm256_min_ps(vf, vmax);
632 __m256i vi32 = _mm256_cvtps_epi32(vf);
636 __m128i lo = _mm256_castsi256_si128(vi32);
637 __m128i hi = _mm256_extracti128_si256(vi32, 1);
638 __m128i vi16 = _mm_packs_epi32(lo, hi);
639 __m128i vi8 = _mm_packs_epi16(vi16, vi16);
644 std::memcpy(&packed, &vi8, 8);
645 std::memcpy(out + i, &packed, 8);
649 for (; i < dim; ++i) {
650 float scaled = std::nearbyintf(in[i] * inv_scale_);
651 int32_t q =
static_cast<int32_t
>(scaled);
652 q = std::clamp(q, -127, 127);
653 out[i] =
static_cast<uint8_t
>(
static_cast<int8_t
>(q));
657inline void Quantizer::quantize_asymmetric_int8_avx2(
658 const float* in, uint8_t* out, uint32_t dim)
const
660 const __m256 vscale = _mm256_set1_ps(inv_scale_);
661 const __m256 vzp = _mm256_set1_ps(params_.
zero_point);
662 const __m256 vmin = _mm256_set1_ps(0.0f);
663 const __m256 vmax = _mm256_set1_ps(255.0f);
666 for (; i + 8 <= dim; i += 8) {
667 __m256 vf = _mm256_loadu_ps(in + i);
668 vf = _mm256_sub_ps(vf, vzp);
669 vf = _mm256_mul_ps(vf, vscale);
670 vf = _mm256_round_ps(vf, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
671 vf = _mm256_max_ps(vf, vmin);
672 vf = _mm256_min_ps(vf, vmax);
674 __m256i vi32 = _mm256_cvtps_epi32(vf);
677 __m128i lo = _mm256_castsi256_si128(vi32);
678 __m128i hi = _mm256_extracti128_si256(vi32, 1);
679 __m128i vi16 = _mm_packus_epi32(lo, hi);
680 __m128i vi8 = _mm_packus_epi16(vi16, vi16);
683 std::memcpy(&packed, &vi8, 8);
684 std::memcpy(out + i, &packed, 8);
689 for (; i < dim; ++i) {
690 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
691 int32_t q =
static_cast<int32_t
>(scaled);
692 q = std::clamp(q, 0, 255);
693 out[i] =
static_cast<uint8_t
>(q);
697#elif defined(SIGNET_QUANT_SSE)
699inline void Quantizer::quantize_symmetric_int8_sse(
700 const float* in, uint8_t* out, uint32_t dim)
const
702 const __m128 vscale = _mm_set1_ps(inv_scale_);
703 const __m128 vmin = _mm_set1_ps(-127.0f);
704 const __m128 vmax = _mm_set1_ps(127.0f);
707 for (; i + 4 <= dim; i += 4) {
708 __m128 vf = _mm_loadu_ps(in + i);
709 vf = _mm_mul_ps(vf, vscale);
711 vf = _mm_max_ps(vf, vmin);
712 vf = _mm_min_ps(vf, vmax);
714 __m128i vi32 = _mm_cvtps_epi32(vf);
715 __m128i vi16 = _mm_packs_epi32(vi32, vi32);
716 __m128i vi8 = _mm_packs_epi16(vi16, vi16);
719 std::memcpy(&packed, &vi8, 4);
720 std::memcpy(out + i, &packed, 4);
723 for (; i < dim; ++i) {
724 float scaled = std::nearbyintf(in[i] * inv_scale_);
725 int32_t q =
static_cast<int32_t
>(scaled);
726 q = std::clamp(q, -127, 127);
727 out[i] =
static_cast<uint8_t
>(
static_cast<int8_t
>(q));
731inline void Quantizer::quantize_asymmetric_int8_sse(
732 const float* in, uint8_t* out, uint32_t dim)
const
734 const __m128 vscale = _mm_set1_ps(inv_scale_);
735 const __m128 vzp = _mm_set1_ps(params_.
zero_point);
736 const __m128 vmin = _mm_set1_ps(0.0f);
737 const __m128 vmax = _mm_set1_ps(255.0f);
740 for (; i + 4 <= dim; i += 4) {
741 __m128 vf = _mm_loadu_ps(in + i);
742 vf = _mm_sub_ps(vf, vzp);
743 vf = _mm_mul_ps(vf, vscale);
745 vf = _mm_max_ps(vf, vmin);
746 vf = _mm_min_ps(vf, vmax);
748 __m128i vi32 = _mm_cvtps_epi32(vf);
753 __m128i vi16 = _mm_packs_epi32(vi32, vi32);
754 __m128i vi8 = _mm_packus_epi16(vi16, vi16);
757 std::memcpy(&packed, &vi8, 4);
758 std::memcpy(out + i, &packed, 4);
762 for (; i < dim; ++i) {
763 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
764 int32_t q =
static_cast<int32_t
>(scaled);
765 q = std::clamp(q, 0, 255);
766 out[i] =
static_cast<uint8_t
>(q);
770#elif defined(SIGNET_QUANT_NEON)
772inline void Quantizer::quantize_symmetric_int8_neon(
773 const float* in, uint8_t* out, uint32_t dim)
const
775 const float32x4_t vscale = vdupq_n_f32(inv_scale_);
778 for (; i + 4 <= dim; i += 4) {
779 float32x4_t vf = vld1q_f32(in + i);
780 vf = vmulq_f32(vf, vscale);
784 vf = vmaxq_f32(vf, vdupq_n_f32(-127.0f));
785 vf = vminq_f32(vf, vdupq_n_f32(127.0f));
787 int32x4_t vi32 = vcvtq_s32_f32(vf);
788 int16x4_t vi16 = vmovn_s32(vi32);
789 int8x8_t vi8 = vmovn_s16(vcombine_s16(vi16, vi16));
792 vst1_lane_u32(
reinterpret_cast<uint32_t*
>(out + i),
793 vreinterpret_u32_s8(vi8), 0);
796 for (; i < dim; ++i) {
797 float scaled = std::nearbyintf(in[i] * inv_scale_);
798 int32_t q =
static_cast<int32_t
>(scaled);
799 q = std::clamp(q, -127, 127);
800 out[i] =
static_cast<uint8_t
>(
static_cast<int8_t
>(q));
804inline void Quantizer::quantize_asymmetric_int8_neon(
805 const float* in, uint8_t* out, uint32_t dim)
const
807 const float32x4_t vscale = vdupq_n_f32(inv_scale_);
808 const float32x4_t vzp = vdupq_n_f32(params_.
zero_point);
811 for (; i + 4 <= dim; i += 4) {
812 float32x4_t vf = vld1q_f32(in + i);
813 vf = vsubq_f32(vf, vzp);
814 vf = vmulq_f32(vf, vscale);
818 vf = vmaxq_f32(vf, vdupq_n_f32(0.0f));
819 vf = vminq_f32(vf, vdupq_n_f32(255.0f));
821 uint32x4_t vu32 = vcvtq_u32_f32(vf);
822 uint16x4_t vu16 = vmovn_u32(vu32);
823 uint8x8_t vu8 = vmovn_u16(vcombine_u16(vu16, vu16));
825 vst1_lane_u32(
reinterpret_cast<uint32_t*
>(out + i),
826 vreinterpret_u32_u8(vu8), 0);
830 for (; i < dim; ++i) {
831 float scaled = std::nearbyintf((in[i] - zp) * inv_scale_);
832 int32_t q =
static_cast<int32_t
>(scaled);
833 q = std::clamp(q, 0, 255);
834 out[i] =
static_cast<uint8_t
>(q);
849#if defined(SIGNET_QUANT_AVX2)
850 quantize_symmetric_int8_avx2(input, output, dim);
851#elif defined(SIGNET_QUANT_SSE)
852 quantize_symmetric_int8_sse(input, output, dim);
853#elif defined(SIGNET_QUANT_NEON)
854 quantize_symmetric_int8_neon(input, output, dim);
856 quantize_symmetric_int8_scalar(input, output, dim);
861#if defined(SIGNET_QUANT_AVX2)
862 quantize_asymmetric_int8_avx2(input, output, dim);
863#elif defined(SIGNET_QUANT_SSE)
864 quantize_asymmetric_int8_sse(input, output, dim);
865#elif defined(SIGNET_QUANT_NEON)
866 quantize_asymmetric_int8_neon(input, output, dim);
868 quantize_asymmetric_int8_scalar(input, output, dim);
874 quantize_symmetric_int4_scalar(input, output, dim);
880 const float* input,
size_t num_vectors)
const
883 std::vector<uint8_t> result(num_vectors * bpv);
885 for (
size_t v = 0; v < num_vectors; ++v) {
887 result.data() + v * bpv);
900inline void Dequantizer::dequantize_symmetric_int8_scalar(
901 const uint8_t* in,
float* out, uint32_t dim)
const
903 const float s = params_.
scale;
904 const float range_max = 127.0f * s;
905 const float range_min = -127.0f * s;
906 for (uint32_t i = 0; i < dim; ++i) {
907 int8_t q =
static_cast<int8_t
>(in[i]);
908 out[i] =
static_cast<float>(q) * s;
910 if (out[i] < range_min || out[i] > range_max) {
911 out[i] = std::clamp(out[i], range_min, range_max);
912 anomaly_count_.fetch_add(1, std::memory_order_relaxed);
917inline void Dequantizer::dequantize_asymmetric_int8_scalar(
918 const uint8_t* in,
float* out, uint32_t dim)
const
920 const float s = params_.
scale;
922 const float range_min = zp;
923 const float range_max = 255.0f * s + zp;
924 for (uint32_t i = 0; i < dim; ++i) {
925 out[i] =
static_cast<float>(in[i]) * s + zp;
927 if (out[i] < range_min || out[i] > range_max) {
928 out[i] = std::clamp(out[i], range_min, range_max);
929 anomaly_count_.fetch_add(1, std::memory_order_relaxed);
934inline void Dequantizer::dequantize_symmetric_int4_scalar(
935 const uint8_t* in,
float* out, uint32_t dim)
const
937 const float s = params_.
scale;
938 const float range_max = 7.0f * s;
939 const float range_min = -7.0f * s;
940 for (uint32_t i = 0; i < dim; ++i) {
941 uint32_t byte_idx = i / 2;
945 nibble = (in[byte_idx] >> 4) & 0x0F;
948 nibble = in[byte_idx] & 0x0F;
954 int8_t signed_val =
static_cast<int8_t
>((nibble ^ 0x08) - 0x08);
956 out[i] =
static_cast<float>(signed_val) * s;
958 if (out[i] < range_min || out[i] > range_max) {
959 out[i] = std::clamp(out[i], range_min, range_max);
960 anomaly_count_.fetch_add(1, std::memory_order_relaxed);
972#if defined(SIGNET_QUANT_AVX2)
974inline void Dequantizer::dequantize_symmetric_int8_avx2(
975 const uint8_t* in,
float* out, uint32_t dim)
const
977 const __m256 vscale = _mm256_set1_ps(params_.
scale);
980 for (; i + 8 <= dim; i += 8) {
983 __m128i raw = _mm_loadl_epi64(
reinterpret_cast<const __m128i*
>(in + i));
986 __m128i vi16 = _mm_cvtepi8_epi16(raw);
989 __m128i lo32 = _mm_cvtepi16_epi32(vi16);
990 __m128i hi16 = _mm_unpackhi_epi64(vi16, vi16);
991 __m128i hi32 = _mm_cvtepi16_epi32(hi16);
994 __m256i vi32 = _mm256_set_m128i(hi32, lo32);
997 __m256 vf = _mm256_cvtepi32_ps(vi32);
998 vf = _mm256_mul_ps(vf, vscale);
1000 _mm256_storeu_ps(out + i, vf);
1004 const float s = params_.
scale;
1005 for (; i < dim; ++i) {
1006 int8_t q =
static_cast<int8_t
>(in[i]);
1007 out[i] =
static_cast<float>(q) * s;
1011inline void Dequantizer::dequantize_asymmetric_int8_avx2(
1012 const uint8_t* in,
float* out, uint32_t dim)
const
1014 const __m256 vscale = _mm256_set1_ps(params_.
scale);
1015 const __m256 vzp = _mm256_set1_ps(params_.
zero_point);
1018 for (; i + 8 <= dim; i += 8) {
1020 __m128i raw = _mm_loadl_epi64(
reinterpret_cast<const __m128i*
>(in + i));
1021 __m128i vu16 = _mm_cvtepu8_epi16(raw);
1023 __m128i lo32 = _mm_cvtepu16_epi32(vu16);
1024 __m128i hi16 = _mm_unpackhi_epi64(vu16, vu16);
1025 __m128i hi32 = _mm_cvtepu16_epi32(hi16);
1027 __m256i vu32 = _mm256_set_m128i(hi32, lo32);
1028 __m256 vf = _mm256_cvtepi32_ps(vu32);
1031 vf = _mm256_fmadd_ps(vf, vscale, vzp);
1033 _mm256_storeu_ps(out + i, vf);
1036 const float s = params_.
scale;
1038 for (; i < dim; ++i) {
1039 out[i] =
static_cast<float>(in[i]) * s + zp;
1043#elif defined(SIGNET_QUANT_SSE)
1045inline void Dequantizer::dequantize_symmetric_int8_sse(
1046 const uint8_t* in,
float* out, uint32_t dim)
const
1048 const __m128 vscale = _mm_set1_ps(params_.
scale);
1051 for (; i + 4 <= dim; i += 4) {
1054 std::memcpy(&raw32, in + i, 4);
1055 __m128i raw = _mm_cvtsi32_si128(raw32);
1058 __m128i vi16 = _mm_unpacklo_epi8(raw, raw);
1059 vi16 = _mm_srai_epi16(vi16, 8);
1062 __m128i sign = _mm_srai_epi16(vi16, 15);
1063 __m128i vi32 = _mm_unpacklo_epi16(vi16, sign);
1065 __m128 vf = _mm_cvtepi32_ps(vi32);
1066 vf = _mm_mul_ps(vf, vscale);
1068 _mm_storeu_ps(out + i, vf);
1071 const float s = params_.
scale;
1072 for (; i < dim; ++i) {
1073 int8_t q =
static_cast<int8_t
>(in[i]);
1074 out[i] =
static_cast<float>(q) * s;
1078inline void Dequantizer::dequantize_asymmetric_int8_sse(
1079 const uint8_t* in,
float* out, uint32_t dim)
const
1081 const __m128 vscale = _mm_set1_ps(params_.
scale);
1082 const __m128 vzp = _mm_set1_ps(params_.
zero_point);
1083 const __m128i vzero = _mm_setzero_si128();
1086 for (; i + 4 <= dim; i += 4) {
1088 std::memcpy(&raw32, in + i, 4);
1089 __m128i raw = _mm_cvtsi32_si128(raw32);
1092 __m128i vu16 = _mm_unpacklo_epi8(raw, vzero);
1093 __m128i vu32 = _mm_unpacklo_epi16(vu16, vzero);
1095 __m128 vf = _mm_cvtepi32_ps(vu32);
1097 vf = _mm_add_ps(_mm_mul_ps(vf, vscale), vzp);
1099 _mm_storeu_ps(out + i, vf);
1102 const float s = params_.
scale;
1104 for (; i < dim; ++i) {
1105 out[i] =
static_cast<float>(in[i]) * s + zp;
1109#elif defined(SIGNET_QUANT_NEON)
1111inline void Dequantizer::dequantize_symmetric_int8_neon(
1112 const uint8_t* in,
float* out, uint32_t dim)
const
1114 const float32x4_t vscale = vdupq_n_f32(params_.
scale);
1117 for (; i + 4 <= dim; i += 4) {
1119 int8x8_t raw8 = vreinterpret_s8_u32(
1120 vld1_dup_u32(
reinterpret_cast<const uint32_t*
>(in + i)));
1123 int16x8_t vi16 = vmovl_s8(raw8);
1124 int32x4_t vi32 = vmovl_s16(vget_low_s16(vi16));
1126 float32x4_t vf = vcvtq_f32_s32(vi32);
1127 vf = vmulq_f32(vf, vscale);
1129 vst1q_f32(out + i, vf);
1132 const float s = params_.
scale;
1133 for (; i < dim; ++i) {
1134 int8_t q =
static_cast<int8_t
>(in[i]);
1135 out[i] =
static_cast<float>(q) * s;
1139inline void Dequantizer::dequantize_asymmetric_int8_neon(
1140 const uint8_t* in,
float* out, uint32_t dim)
const
1142 const float32x4_t vscale = vdupq_n_f32(params_.
scale);
1143 const float32x4_t vzp = vdupq_n_f32(params_.
zero_point);
1146 for (; i + 4 <= dim; i += 4) {
1147 uint8x8_t raw8 = vreinterpret_u8_u32(
1148 vld1_dup_u32(
reinterpret_cast<const uint32_t*
>(in + i)));
1150 uint16x8_t vu16 = vmovl_u8(raw8);
1151 uint32x4_t vu32 = vmovl_u16(vget_low_u16(vu16));
1153 float32x4_t vf = vcvtq_f32_u32(vu32);
1154 vf = vmlaq_f32(vzp, vf, vscale);
1156 vst1q_f32(out + i, vf);
1159 const float s = params_.
scale;
1161 for (; i < dim; ++i) {
1162 out[i] =
static_cast<float>(in[i]) * s + zp;
1175 switch (params_.
scheme) {
1177#if defined(SIGNET_QUANT_AVX2)
1178 dequantize_symmetric_int8_avx2(input, output, dim);
1179#elif defined(SIGNET_QUANT_SSE)
1180 dequantize_symmetric_int8_sse(input, output, dim);
1181#elif defined(SIGNET_QUANT_NEON)
1182 dequantize_symmetric_int8_neon(input, output, dim);
1184 dequantize_symmetric_int8_scalar(input, output, dim);
1189#if defined(SIGNET_QUANT_AVX2)
1190 dequantize_asymmetric_int8_avx2(input, output, dim);
1191#elif defined(SIGNET_QUANT_SSE)
1192 dequantize_asymmetric_int8_sse(input, output, dim);
1193#elif defined(SIGNET_QUANT_NEON)
1194 dequantize_asymmetric_int8_neon(input, output, dim);
1196 dequantize_asymmetric_int8_scalar(input, output, dim);
1201 dequantize_symmetric_int4_scalar(input, output, dim);
1207 const uint8_t* input,
size_t num_vectors)
const
1212 std::vector<std::vector<float>> result(num_vectors);
1214 for (
size_t v = 0; v < num_vectors; ++v) {
1215 result[v].resize(dim);
1216 dequantize(input + v * bpv, result[v].data());
1223 const uint8_t* input,
size_t num_vectors)
const
1228 std::vector<float> result(num_vectors * dim);
1230 for (
size_t v = 0; v < num_vectors; ++v) {
1231 dequantize(input + v * bpv, result.data() + v * dim);
1246 const size_t offset = buf_.size();
1247 buf_.resize(offset + bpv);
1248 quantizer_.
quantize(data, buf_.data() + offset);
1254 buf_.insert(buf_.end(), data, data + bpv);
1261 const size_t offset = buf_.size();
1265 quantizer_.
quantize(data + v * dim,
1266 buf_.data() + offset + v * bpv);
1273 std::vector<uint8_t> result = std::move(buf_);
1280 const std::string& name,
1300 const uint8_t* data,
size_t data_size)
1303 if (bpv == 0)
return {};
1305 if (data_size % bpv != 0) {
1309 const size_t num = data_size / bpv;
1314 const uint8_t* page_data,
size_t page_size,
size_t index)
1317 const size_t offset = index * bpv;
1319 std::vector<float> result(params_.
dimension);
1321 if (offset + bpv > page_size) {
1326 dequantizer_.
dequantize(page_data + offset, result.data());
1331 const uint8_t* data,
size_t data_size)
1336 "quantized vector: bytes_per_vector is zero"};
1339 if (data_size % bpv != 0) {
1341 "quantized vector page size is not a multiple of bytes_per_vector"};
Dequantizes INT8/INT4 quantized vectors back to float32.
void dequantize(const uint8_t *input, float *output) const
Dequantize a single quantized vector to float32.
uint64_t anomaly_count() const
EU AI Act Art.12 anomaly tracking: number of dequantized values that fell outside the representable q...
Dequantizer(QuantizationParams params)
Construct a dequantizer with the given parameters.
std::vector< float > dequantize_flat(const uint8_t *input, size_t num_vectors) const
Flat batch dequantize: returns all floats in one contiguous buffer.
const QuantizationParams & params() const
Access the quantization parameters.
std::vector< std::vector< float > > dequantize_batch(const uint8_t *input, size_t num_vectors) const
Dequantize a batch of vectors, returning a vector-of-vectors.
Reads quantized page data (FIXED_LEN_BYTE_ARRAY) and dequantizes to float32 on demand.
expected< RawResult > read_raw(const uint8_t *data, size_t data_size)
Read raw quantized bytes without dequantization.
std::vector< float > read_vector(const uint8_t *page_data, size_t page_size, size_t index)
Read and dequantize a single vector by index within the page.
std::vector< std::vector< float > > read_page(const uint8_t *data, size_t data_size)
Read an entire page and dequantize all vectors to float32.
QuantizedVectorReader(QuantizationParams params)
Construct a reader with the given quantization parameters.
Accumulates float32 vectors, quantizes them, and produces FIXED_LEN_BYTE_ARRAY page data suitable for...
size_t num_vectors() const
Number of vectors currently buffered.
std::vector< uint8_t > flush()
Flush accumulated data as FIXED_LEN_BYTE_ARRAY page bytes.
static ColumnDescriptor make_descriptor(const std::string &name, const QuantizationParams ¶ms)
Create a ColumnDescriptor suitable for a quantized vector column.
QuantizedVectorWriter(QuantizationParams params)
Construct a writer with the given quantization parameters.
void add(const float *data)
Add a single float32 vector (quantized internally).
void add_batch(const float *data, size_t num_vectors)
Add a batch of float32 vectors (quantized internally).
void add_raw(const uint8_t *data)
Add pre-quantized raw bytes for one vector.
const QuantizationParams & params() const
Access the quantization parameters.
Quantizes float32 vectors to INT8 or INT4 representation.
std::vector< uint8_t > quantize_batch(const float *input, size_t num_vectors) const
Quantize a batch of vectors into a flat buffer of quantized bytes.
Quantizer(QuantizationParams params)
Construct a quantizer with the given parameters.
const QuantizationParams & params() const
Access the quantization parameters.
void quantize(const float *input, uint8_t *output) const
Quantize a single float32 vector into the output buffer.
A lightweight result type that holds either a success value of type T or an Error.
@ FIXED_LEN_BYTE_ARRAY
Fixed-length byte array (UUID, vectors, decimals).
@ FLOAT32_VECTOR
ML embedding vector — FIXED_LEN_BYTE_ARRAY(dim*4).
@ INVALID_FILE
The file is not a valid Parquet file (e.g. missing or wrong magic bytes).
@ INTERNAL_ERROR
An unexpected internal error that does not fit any other category.
@ CORRUPT_PAGE
A data page failed integrity checks (bad CRC, truncated, or exceeds size limits).
QuantizationScheme
Identifies the quantization method used for vector compression.
@ SYMMETRIC_INT8
value = round(float / scale), range [-127, 127].
@ SYMMETRIC_INT4
value = round(float / scale), range [-7, 7], nibble-packed.
@ ASYMMETRIC_INT8
value = round((float - zero_point) / scale), range [0, 255].
@ REQUIRED
Exactly one value per row (non-nullable).
Descriptor for a single column in a Parquet schema.
int32_t type_length
Byte length for FIXED_LEN_BYTE_ARRAY columns (-1 = N/A).
LogicalType logical_type
Semantic annotation (STRING, TIMESTAMP_NS, etc.).
Repetition repetition
Nullability / cardinality.
std::string name
Column name (unique within a schema).
PhysicalType physical_type
On-disk storage type.
Lightweight error value carrying an ErrorCode and a human-readable message.
Parameters that fully describe a quantization mapping.
static QuantizationParams compute(const float *data, size_t num_vectors, uint32_t dim, QuantizationScheme scheme)
Compute optimal quantization parameters from a batch of vectors.
std::string serialize() const
Serialize to a compact key-value string for Parquet metadata.
float scale
Scale factor (float units per quantization step).
size_t bytes_per_vector() const
Storage size per vector in bytes.
static expected< QuantizationParams > deserialize(const std::string &s)
Deserialize from the key-value string produced by serialize().
QuantizationScheme scheme
Quantization scheme (symmetric/asymmetric, INT8/INT4).
uint32_t dimension
Vector dimension (number of float elements).
float zero_point
Offset (used by ASYMMETRIC_INT8 only).
Result of a raw (non-dequantized) page read.
size_t num_vectors
Number of vectors in the page.
const uint8_t * data
Pointer to quantized byte data (not owned).
Parquet format enumerations, type traits, and statistics structs.