Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
numpy_bridge.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3#pragma once
4
19
21#include "signet/error.hpp"
22
23#include <cstdint>
24#include <cstdlib>
25#include <cstring>
26#include <string>
27#include <utility>
28#include <vector>
29
30namespace signet::forge {
31
35
40enum class DLDeviceType : int32_t {
41 kDLCPU = 1,
42 kDLCUDA = 2,
43 kDLCUDAHost = 3,
44 kDLROCM = 10,
45 kDLMetal = 8,
46 kDLVulkan = 7
47};
48
50enum class DLDataTypeCode : uint8_t {
51 kDLInt = 0,
52 kDLUInt = 1,
53 kDLFloat = 2,
54 kDLBfloat = 4
55};
56
65struct DLDataType {
67 uint8_t bits;
68 uint16_t lanes = 1;
69};
70
76
82struct DLTensor {
83 void* data;
85 int32_t ndim;
87 int64_t* shape;
88 int64_t* strides;
89 uint64_t byte_offset;
90};
91
107
109
112
136 DLDataType dt;
137 dt.lanes = 1;
138
139 switch (dtype) {
142 dt.bits = 32;
143 break;
146 dt.bits = 64;
147 break;
150 dt.bits = 16;
151 break;
154 dt.bits = 32;
155 break;
158 dt.bits = 64;
159 break;
162 dt.bits = 16;
163 break;
166 dt.bits = 8;
167 break;
170 dt.bits = 8;
171 break;
173 // DLPack convention: booleans are represented as uint8
175 dt.bits = 8;
176 break;
177 }
178
179 return dt;
180}
181
192 if (dl_dtype.lanes != 1) {
194 "multi-lane DLPack dtypes are not supported"};
195 }
196
197 switch (dl_dtype.code) {
199 switch (dl_dtype.bits) {
200 case 16: return TensorDataType::FLOAT16;
201 case 32: return TensorDataType::FLOAT32;
202 case 64: return TensorDataType::FLOAT64;
203 default:
205 "unsupported DLPack float bit width: "
206 + std::to_string(dl_dtype.bits)};
207 }
208
210 switch (dl_dtype.bits) {
211 case 8: return TensorDataType::INT8;
212 case 16: return TensorDataType::INT16;
213 case 32: return TensorDataType::INT32;
214 case 64: return TensorDataType::INT64;
215 default:
217 "unsupported DLPack int bit width: "
218 + std::to_string(dl_dtype.bits)};
219 }
220
222 switch (dl_dtype.bits) {
223 case 8: return TensorDataType::UINT8;
224 default:
226 "unsupported DLPack uint bit width: "
227 + std::to_string(dl_dtype.bits)
228 + " (only uint8 is supported)"};
229 }
230
233 "bfloat16 is not supported by Signet TensorDataType"};
234
235 default:
237 "unknown DLPack data type code"};
238 }
239}
240
242
245namespace detail {
246
256 std::vector<int64_t> shape;
257 // strides left empty for contiguous tensors (dl_tensor.strides = nullptr)
258};
259
267 std::vector<int64_t> shape;
268};
269
277 if (self == nullptr) return;
278 if (self->manager_ctx != nullptr) {
279 delete static_cast<DLPackViewCtx*>(self->manager_ctx);
280 self->manager_ctx = nullptr;
281 }
282 delete self;
283}
284
292 if (self == nullptr) return;
293 if (self->manager_ctx != nullptr) {
294 delete static_cast<DLPackOwnedCtx*>(self->manager_ctx);
295 self->manager_ctx = nullptr;
296 }
297 delete self;
298}
299
300} // namespace detail
302
330public:
346 if (!tensor.is_valid()) {
348 "cannot export invalid tensor to DLPack"};
349 }
350
351 if (!tensor.is_contiguous()) {
353 "non-contiguous tensor cannot be exported to DLPack; "
354 "call clone() first"};
355 }
356
357 // Build the manager context (owns shape array, NOT the data)
358 auto* ctx = new detail::DLPackViewCtx();
359 ctx->shape = tensor.shape().dims;
360
361 // Build the DLManagedTensor
362 auto* managed = new DLManagedTensor();
363 managed->manager_ctx = ctx;
364 managed->deleter = detail::dlpack_view_deleter;
365
366 DLTensor& dl = managed->dl_tensor;
367 dl.data = const_cast<void*>(tensor.data());
369 dl.ndim = static_cast<int32_t>(tensor.shape().ndim());
370 dl.dtype = to_dlpack_dtype(tensor.dtype());
371 dl.shape = ctx->shape.data();
372 dl.strides = nullptr; // contiguous C-order
373 dl.byte_offset = 0;
374
375 return managed;
376 }
377
392 TensorView view = tensor.view();
393
394 if (!view.is_valid()) {
396 "cannot export invalid OwnedTensor to DLPack"};
397 }
398
399 if (!view.is_contiguous()) {
401 "non-contiguous OwnedTensor cannot be exported to DLPack"};
402 }
403
404 // Build the manager context (owns the tensor data + shape array)
405 auto* ctx = new detail::DLPackOwnedCtx();
406 ctx->owned_tensor = std::move(tensor);
407 ctx->shape = view.shape().dims;
408
409 // Build the DLManagedTensor
410 auto* managed = new DLManagedTensor();
411 managed->manager_ctx = ctx;
412 managed->deleter = detail::dlpack_owned_deleter;
413
414 DLTensor& dl = managed->dl_tensor;
415 dl.data = const_cast<void*>(view.data());
417 dl.ndim = static_cast<int32_t>(view.shape().ndim());
418 dl.dtype = to_dlpack_dtype(view.dtype());
419 dl.shape = ctx->shape.data();
420 dl.strides = nullptr; // contiguous C-order
421 dl.byte_offset = 0;
422
423 return managed;
424 }
425
443 static inline expected<TensorView> import_tensor(const DLManagedTensor* managed) {
444 if (managed == nullptr) {
446 "null DLManagedTensor pointer"};
447 }
448
449 const DLTensor& dl = managed->dl_tensor;
450
451 // Only CPU tensors are supported
455 "only CPU/CUDAHost DLPack tensors can be imported"};
456 }
457
458 if (dl.data == nullptr) {
460 "DLTensor data pointer is null"};
461 }
462
463 if (dl.ndim <= 0) {
465 "DLTensor has zero or negative ndim"};
466 }
467
468 if (dl.shape == nullptr) {
470 "DLTensor shape pointer is null"};
471 }
472
473 // Reject strided (non-contiguous) tensors for TensorView import.
474 // TensorView assumes C-contiguous layout.
475 if (dl.strides != nullptr) {
476 // Verify strides match C-contiguous layout
477 int64_t expected_stride = 1;
478 for (int32_t d = dl.ndim - 1; d >= 0; --d) {
479 if (dl.strides[d] != expected_stride) {
481 "strided (non-contiguous) DLPack tensor cannot "
482 "be imported as TensorView; use import_tensor_copy()"};
483 }
484 expected_stride *= dl.shape[d];
485 }
486 }
487
488 auto dtype_result = from_dlpack_dtype(dl.dtype);
489 if (!dtype_result) {
490 return dtype_result.error();
491 }
492
493 // CWE-20: Improper Input Validation (DLPack §3.2 max_ndim)
494 // L14: Reject unreasonable ndim values (DLPack spec uses int32_t)
495 if (dl.ndim > 32) {
497 "DLTensor ndim exceeds reasonable limit (32)"};
498 }
499
500 TensorShape shape;
501 shape.dims.reserve(static_cast<size_t>(dl.ndim));
502
503 // CWE-190: Integer Overflow — validate byte_offset is within tensor data bounds
504 const size_t elem_size = tensor_element_size(*dtype_result);
505 size_t total_elements = 1;
506 for (int32_t d = 0; d < dl.ndim; ++d) {
507 if (dl.shape[d] <= 0) {
509 "DLTensor shape dimension must be positive"};
510 }
511 const size_t dim_val = static_cast<size_t>(dl.shape[d]);
512 if (total_elements > SIZE_MAX / dim_val) {
514 "DLTensor shape product overflows size_t"};
515 }
516 total_elements *= dim_val;
517 shape.dims.push_back(dl.shape[d]);
518 }
519 // CWE-190: Integer Overflow — num_elements*elem_size overflow check
520 if (elem_size > 0 && total_elements > SIZE_MAX / elem_size) {
522 "DLTensor total byte size overflows size_t"};
523 }
524 const size_t total_size = total_elements * elem_size;
525 if (dl.byte_offset > total_size) {
527 "DLPack byte_offset out of range"};
528 }
529
530 // Apply byte_offset
531 void* data_ptr = static_cast<uint8_t*>(dl.data) + dl.byte_offset;
532
533 return TensorView(data_ptr, shape, *dtype_result);
534 }
535
555 if (managed == nullptr) {
557 "null DLManagedTensor pointer"};
558 }
559
560 const DLTensor& dl = managed->dl_tensor;
561
565 "only CPU/CUDAHost DLPack tensors can be imported"};
566 }
567
568 if (dl.data == nullptr || dl.ndim <= 0 || dl.shape == nullptr) {
570 "invalid DLTensor (null data/shape or non-positive ndim)"};
571 }
572
573 auto dtype_result = from_dlpack_dtype(dl.dtype);
574 if (!dtype_result) {
575 return dtype_result.error();
576 }
577
578 TensorDataType dtype = *dtype_result;
579 const size_t elem_size = tensor_element_size(dtype);
580
581 TensorShape shape;
582 shape.dims.assign(dl.shape, dl.shape + dl.ndim);
583 const size_t num_elements = shape.num_elements();
584
585 // CWE-190: Integer Overflow — check for multiplication overflow before allocating
586 if (elem_size != 0 && num_elements > SIZE_MAX / elem_size) {
588 "DLPack tensor size overflow (num_elements * elem_size)"};
589 }
590
591 // Source data pointer with byte offset applied
592 const uint8_t* src_base = static_cast<const uint8_t*>(dl.data)
593 + dl.byte_offset;
594
595 // Check if contiguous -- if so, fast memcpy path
596 bool is_contiguous = (dl.strides == nullptr);
597 if (!is_contiguous && dl.strides != nullptr) {
598 // Check if strides match C-contiguous
599 int64_t expected_stride = 1;
600 is_contiguous = true;
601 for (int32_t d = dl.ndim - 1; d >= 0; --d) {
602 if (dl.strides[d] != expected_stride) {
603 is_contiguous = false;
604 break;
605 }
606 expected_stride *= dl.shape[d];
607 }
608 }
609
610 if (is_contiguous) {
611 // Fast path: contiguous data, direct memcpy
612 OwnedTensor result(shape, dtype);
613 std::memcpy(result.data(), src_base, num_elements * elem_size);
614 return result;
615 }
616
617 // Slow path: strided data, element-by-element copy.
618 // Walk the multi-dimensional index space and compute source offsets
619 // from strides.
620 OwnedTensor result(shape, dtype);
621 uint8_t* dst = static_cast<uint8_t*>(result.data());
622
623 // Multi-index iteration
624 std::vector<int64_t> idx(static_cast<size_t>(dl.ndim), 0);
625 for (size_t flat = 0; flat < num_elements; ++flat) {
626 // Compute source byte offset from strides
627 int64_t src_elem_offset = 0;
628 for (int32_t d = 0; d < dl.ndim; ++d) {
629 src_elem_offset += idx[static_cast<size_t>(d)]
630 * dl.strides[static_cast<size_t>(d)];
631 }
632
633 const uint8_t* src_elem = src_base
634 + static_cast<size_t>(src_elem_offset) * elem_size;
635 std::memcpy(dst + flat * elem_size, src_elem, elem_size);
636
637 // Increment multi-index (row-major / C-order)
638 for (int32_t d = dl.ndim - 1; d >= 0; --d) {
639 auto ud = static_cast<size_t>(d);
640 idx[ud]++;
641 if (idx[ud] < dl.shape[d]) break;
642 idx[ud] = 0;
643 }
644 }
645
646 return result;
647 }
648};
649
663 void* data;
664 size_t itemsize;
665 std::string format;
666 int64_t ndim;
667 std::vector<int64_t> shape;
668 std::vector<int64_t> strides;
669};
670
673namespace detail {
674
681 switch (dtype) {
682 case TensorDataType::FLOAT32: return "f";
683 case TensorDataType::FLOAT64: return "d";
684 case TensorDataType::INT32: return "i";
685 case TensorDataType::INT64: return "l";
686 case TensorDataType::INT8: return "b";
687 case TensorDataType::UINT8: return "B";
688 case TensorDataType::INT16: return "h";
689 case TensorDataType::FLOAT16: return "e";
690 case TensorDataType::BOOL: return "?";
691 }
692 return nullptr; // unreachable
693}
694
695} // namespace detail
697
721 if (!tensor.is_valid()) {
723 "cannot create BufferInfo from invalid tensor"};
724 }
725
726 if (!tensor.is_contiguous()) {
728 "non-contiguous tensor cannot be described by BufferInfo; "
729 "call clone() first"};
730 }
731
732 const char* fmt = detail::tensor_dtype_to_pybuf_format(tensor.dtype());
733 if (fmt == nullptr) {
735 "tensor dtype has no Python buffer format mapping"};
736 }
737
738 const size_t elem_size = tensor.element_size();
739 const auto& dims = tensor.shape().dims;
740 const int64_t ndim = static_cast<int64_t>(dims.size());
741
742 // Compute C-contiguous strides (in bytes) from innermost to outermost
743 std::vector<int64_t> strides(static_cast<size_t>(ndim));
744 if (ndim > 0) {
745 strides[static_cast<size_t>(ndim - 1)] = static_cast<int64_t>(elem_size);
746 for (int64_t d = ndim - 2; d >= 0; --d) {
747 auto ud = static_cast<size_t>(d);
748 auto ud1 = static_cast<size_t>(d + 1);
749 strides[ud] = strides[ud1] * dims[ud1];
750 }
751 }
752
753 BufferInfo info;
754 info.data = const_cast<void*>(tensor.data());
755 info.itemsize = elem_size;
756 info.format = fmt;
757 info.ndim = ndim;
758 info.shape = dims;
759 info.strides = std::move(strides);
760
761 return info;
762}
763
764} // namespace signet::forge
Exports and imports Signet tensors via DLPack, enabling zero-copy interoperability with PyTorch,...
static expected< DLManagedTensor * > export_owned_tensor(OwnedTensor &&tensor)
Export an OwnedTensor as a DLManagedTensor (zero-copy ownership transfer).
static expected< OwnedTensor > import_tensor_copy(const DLManagedTensor *managed)
Import a DLManagedTensor as an OwnedTensor (deep copy).
static expected< DLManagedTensor * > export_tensor(const TensorView &tensor)
Export a TensorView as a DLManagedTensor (zero-copy, non-owning).
static expected< TensorView > import_tensor(const DLManagedTensor *managed)
Import a DLManagedTensor as a TensorView (zero-copy).
An owning tensor that manages its own memory via a std::vector<uint8_t> buffer.
void * data() noexcept
Raw mutable pointer to the tensor buffer.
A lightweight, non-owning view into a contiguous block of typed memory, interpreted as a multi-dimens...
bool is_valid() const noexcept
True if the view points to valid data.
bool is_contiguous() const noexcept
True if the data is densely packed (no stride gaps).
const TensorShape & shape() const noexcept
The shape of this tensor view.
TensorDataType dtype() const noexcept
The element data type.
size_t element_size() const noexcept
Bytes per element.
void * data() noexcept
Raw mutable pointer to the underlying data buffer.
A lightweight result type that holds either a success value of type T or an Error.
Definition error.hpp:145
const char * tensor_dtype_to_pybuf_format(TensorDataType dtype)
Map TensorDataType to a Python struct format character (PEP 3118).
void dlpack_owned_deleter(DLManagedTensor *self)
Deleter for a DLManagedTensor created from an OwnedTensor (owning).
void dlpack_view_deleter(DLManagedTensor *self)
Deleter for a DLManagedTensor created from a TensorView (non-owning).
expected< BufferInfo > to_buffer_info(const TensorView &tensor)
Create a BufferInfo from a TensorView for Python buffer protocol export.
DLDataType to_dlpack_dtype(TensorDataType dtype)
Convert a Signet TensorDataType to a DLPack DLDataType.
expected< TensorDataType > from_dlpack_dtype(DLDataType dl_dtype)
Convert a DLPack DLDataType back to a Signet TensorDataType.
DLDeviceType
DLPack device type, matching DLDeviceType from dlpack.h.
@ kDLVulkan
Vulkan GPU memory.
@ kDLCUDAHost
CUDA pinned host memory.
@ kDLCPU
System main memory.
@ kDLROCM
AMD ROCm GPU memory.
@ kDLMetal
Apple Metal GPU memory.
@ kDLCUDA
NVIDIA CUDA GPU memory.
@ UNSUPPORTED_TYPE
The file contains a Parquet physical or logical type that is not implemented.
@ INTERNAL_ERROR
An unexpected internal error that does not fit any other category.
@ INVALID_ARGUMENT
A caller-supplied argument is outside the valid range or violates a precondition.
TensorDataType
Element data type for tensor storage, mapping to ONNX/PyTorch/TF type enums.
@ FLOAT64
IEEE 754 double-precision (8 bytes)
@ INT64
Signed 64-bit integer.
@ INT16
Signed 16-bit integer.
@ INT32
Signed 32-bit integer.
@ FLOAT32
IEEE 754 single-precision (4 bytes)
@ FLOAT16
IEEE 754 half-precision (2 bytes)
@ UINT8
Unsigned 8-bit integer.
@ INT8
Signed 8-bit integer.
constexpr size_t tensor_element_size(TensorDataType dtype) noexcept
Returns the byte size of a single element of the given tensor data type.
DLDataTypeCode
DLPack data type code, matching DLDataTypeCode from dlpack.h.
@ kDLBfloat
Brain floating point (bfloat16)
@ kDLFloat
IEEE floating point.
@ kDLUInt
Unsigned integer.
Simple C-contiguous buffer descriptor for Python interop.
std::string format
Python struct format character (e.g. "f", "d")
void * data
Pointer to contiguous data (non-owning)
size_t itemsize
Bytes per element (e.g. 4 for float32)
int64_t ndim
Number of dimensions.
std::vector< int64_t > strides
Stride in bytes for each dimension (ndim elements)
std::vector< int64_t > shape
Shape in each dimension (ndim elements)
DLPack data type descriptor.
DLDataTypeCode code
Type category (int/uint/float/bfloat)
uint16_t lanes
Number of SIMD lanes (1 for scalar)
uint8_t bits
Number of bits per element (e.g. 32 for float32)
DLPack device descriptor (type + ordinal).
int32_t device_id
Device ordinal (0 for single-device systems)
DLDeviceType device_type
Device type (CPU, CUDA, etc.)
DLPack managed tensor – the exchange object for from_dlpack().
DLTensor dl_tensor
The tensor descriptor (layout, data pointer, dtype)
void * manager_ctx
Opaque context for the deleter (owns shape/data)
void(* deleter)(DLManagedTensor *)
Destructor callback (must be called exactly once)
DLPack tensor descriptor (non-owning).
int64_t * shape
Shape array with ndim elements.
void * data
Pointer to the start of tensor data.
int32_t ndim
Number of dimensions (must be > 0)
int64_t * strides
Stride array in elements (nullptr = C-contiguous)
DLDataType dtype
Element data type descriptor.
uint64_t byte_offset
Byte offset from data pointer to first element.
DLDevice device
Device where data resides (CPU, CUDA, etc.)
Lightweight error value carrying an ErrorCode and a human-readable message.
Definition error.hpp:101
Describes the shape of a tensor as a vector of dimension sizes.
int64_t num_elements() const noexcept
Total number of elements (product of all dimensions).
size_t ndim() const noexcept
Number of dimensions.
std::vector< int64_t > dims
Dimension sizes (e.g. {32, 768} for a 32x768 matrix)
Context stored in DLManagedTensor.manager_ctx for owning exports.
std::vector< int64_t > shape
DLTensor.shape points here.
OwnedTensor owned_tensor
Keeps data alive.
Context stored in DLManagedTensor.manager_ctx for non-owning exports.
std::vector< int64_t > shape
Shape array; DLTensor.shape points here.
Zero-copy tensor bridge: maps Parquet column data directly into ML-framework-compatible tensor views ...