SIGNET_FORGE/onnx__bridge_8hpp_source.html

// SPDX-License-Identifier: AGPL-3.0-or-later

// Copyright 2026 Johnson Ogundeji

#pragma once


#include "signet/ai/tensor_bridge.hpp"

#include "signet/error.hpp"


#include <cstdint>

#include <string>

#include <utility>

#include <vector>


namespace signet::forge {


enum class OnnxTensorType : int32_t {

    UNDEFINED = 0,

    FLOAT     = 1,

    UINT8     = 2,

    INT8      = 3,

    UINT16    = 4,

    INT16     = 5,

    INT32     = 6,

    INT64     = 7,

    STRING    = 8,

    BOOL      = 9,

    FLOAT16   = 10,

    DOUBLE    = 11,

    UINT32    = 12,

    UINT64    = 13,

    BFLOAT16  = 16

};


inline OnnxTensorType to_onnx_type(TensorDataType dtype) {

    switch (dtype) {

        case TensorDataType::FLOAT32: return OnnxTensorType::FLOAT;

        case TensorDataType::FLOAT64: return OnnxTensorType::DOUBLE;

        case TensorDataType::INT32:   return OnnxTensorType::INT32;

        case TensorDataType::INT64:   return OnnxTensorType::INT64;

        case TensorDataType::INT8:    return OnnxTensorType::INT8;

        case TensorDataType::UINT8:   return OnnxTensorType::UINT8;

        case TensorDataType::INT16:   return OnnxTensorType::INT16;

        case TensorDataType::FLOAT16: return OnnxTensorType::FLOAT16;

        case TensorDataType::BOOL:    return OnnxTensorType::BOOL;

    }

    return OnnxTensorType::UNDEFINED; // unreachable, silence warnings

}


inline expected<TensorDataType> from_onnx_type(OnnxTensorType ort_type) {

    switch (ort_type) {

        case OnnxTensorType::FLOAT:   return TensorDataType::FLOAT32;

        case OnnxTensorType::DOUBLE:  return TensorDataType::FLOAT64;

        case OnnxTensorType::INT32:   return TensorDataType::INT32;

        case OnnxTensorType::INT64:   return TensorDataType::INT64;

        case OnnxTensorType::INT8:    return TensorDataType::INT8;

        case OnnxTensorType::UINT8:   return TensorDataType::UINT8;

        case OnnxTensorType::INT16:   return TensorDataType::INT16;

        case OnnxTensorType::FLOAT16: return TensorDataType::FLOAT16;

        case OnnxTensorType::BOOL:    return TensorDataType::BOOL;


        case OnnxTensorType::STRING:

        case OnnxTensorType::UINT16:

        case OnnxTensorType::UINT32:

        case OnnxTensorType::UINT64:

        case OnnxTensorType::BFLOAT16:

        case OnnxTensorType::UNDEFINED:

        default:

            return Error{ErrorCode::UNSUPPORTED_TYPE,

                         "OnnxTensorType has no Signet TensorDataType equivalent"};

    }

}


struct OnnxTensorInfo {

    void*                  data         = nullptr;

    std::vector<int64_t>   shape;

    OnnxTensorType         element_type = OnnxTensorType::UNDEFINED;

    size_t                 byte_size    = 0;

    bool                   is_owner     = false;


    [[nodiscard]] bool is_valid() const {

        return data != nullptr

            && byte_size > 0

            && !shape.empty()

            && element_type != OnnxTensorType::UNDEFINED;

    }


};


inline expected<OnnxTensorInfo> prepare_for_onnx(const TensorView& tensor) {

    if (!tensor.is_valid()) {

        return Error{ErrorCode::INTERNAL_ERROR,

                     "cannot prepare invalid tensor for ONNX"};

    }


    if (!tensor.is_contiguous()) {

        return Error{ErrorCode::UNSUPPORTED_TYPE,

                     "non-contiguous tensors cannot be exported to ONNX; "

                     "call clone() first to produce a contiguous copy"};

    }


    // CWE-20: Improper Input Validation — all ONNX dimensions must be positive

    for (auto d : tensor.shape().dims) {

        if (d <= 0) {

            return Error{ErrorCode::INVALID_ARGUMENT,

                         "ONNX tensor dimensions must be positive"};

        }

    }


    OnnxTensorInfo info;

    // M28 WARNING: ONNX Runtime requires non-const void*. The caller MUST ensure

    // the source tensor data is not backed by read-only memory (e.g., mmap PROT_READ).

    // If the tensor originates from an mmap'd file, copy it first via OwnedTensor.

    info.data         = const_cast<void*>(tensor.data());

    info.shape        = tensor.shape().dims;

    info.element_type = to_onnx_type(tensor.dtype());

    info.byte_size    = tensor.byte_size();

    info.is_owner     = false; // zero-copy: TensorView owns the data


    if (info.element_type == OnnxTensorType::UNDEFINED) {

        return Error{ErrorCode::UNSUPPORTED_TYPE,

                     "tensor dtype maps to UNDEFINED ONNX type"};

    }


    return info;

}


inline expected<OnnxTensorInfo> prepare_for_onnx(const OwnedTensor& tensor) {

    return prepare_for_onnx(tensor.view());

}


struct OnnxInputSet {

    std::vector<std::string>     names;

    std::vector<OnnxTensorInfo>  tensors;


    [[nodiscard]] bool is_valid() const {

        if (names.empty() || names.size() != tensors.size()) return false;

        for (const auto& t : tensors) {

            if (!t.is_valid()) return false;

        }

        return true;

    }


};


inline expected<OnnxInputSet> prepare_inputs_for_onnx(

    const std::vector<std::pair<std::string, TensorView>>& inputs)

{

    if (inputs.empty()) {

        return Error{ErrorCode::INTERNAL_ERROR,

                     "cannot prepare empty input set for ONNX"};

    }


    OnnxInputSet result;

    result.names.reserve(inputs.size());

    result.tensors.reserve(inputs.size());


    for (const auto& [name, tensor] : inputs) {

        auto info = prepare_for_onnx(tensor);

        if (!info) {

            return Error{info.error().code,

                         "failed to prepare ONNX input '" + name + "': "

                         + info.error().message};

        }

        result.names.push_back(name);

        result.tensors.push_back(std::move(*info));

    }


    return result;

}


inline const char* onnx_type_name(OnnxTensorType t) {

    switch (t) {

        case OnnxTensorType::UNDEFINED: return "UNDEFINED";

        case OnnxTensorType::FLOAT:     return "FLOAT";

        case OnnxTensorType::UINT8:     return "UINT8";

        case OnnxTensorType::INT8:      return "INT8";

        case OnnxTensorType::UINT16:    return "UINT16";

        case OnnxTensorType::INT16:     return "INT16";

        case OnnxTensorType::INT32:     return "INT32";

        case OnnxTensorType::INT64:     return "INT64";

        case OnnxTensorType::STRING:    return "STRING";

        case OnnxTensorType::BOOL:      return "BOOL";

        case OnnxTensorType::FLOAT16:   return "FLOAT16";

        case OnnxTensorType::DOUBLE:    return "DOUBLE";

        case OnnxTensorType::UINT32:    return "UINT32";

        case OnnxTensorType::UINT64:    return "UINT64";

        case OnnxTensorType::BFLOAT16:  return "BFLOAT16";

        default:                        return "UNKNOWN";

    }

}


} // namespace signet::forge

signet::forge::OwnedTensor
An owning tensor that manages its own memory via a std::vector<uint8_t> buffer.
Definition tensor_bridge.hpp:531

signet::forge::OwnedTensor::view
TensorView view()
Get a mutable non-owning view.
Definition tensor_bridge.hpp:591

signet::forge::TensorView
A lightweight, non-owning view into a contiguous block of typed memory, interpreted as a multi-dimens...
Definition tensor_bridge.hpp:274

signet::forge::TensorView::is_valid
bool is_valid() const noexcept
True if the view points to valid data.
Definition tensor_bridge.hpp:464

signet::forge::TensorView::is_contiguous
bool is_contiguous() const noexcept
True if the data is densely packed (no stride gaps).
Definition tensor_bridge.hpp:459

signet::forge::TensorView::byte_size
size_t byte_size() const noexcept
Total byte size of the tensor data (num_elements * element_size).
Definition tensor_bridge.hpp:343

signet::forge::TensorView::shape
const TensorShape & shape() const noexcept
The shape of this tensor view.
Definition tensor_bridge.hpp:327

signet::forge::TensorView::dtype
TensorDataType dtype() const noexcept
The element data type.
Definition tensor_bridge.hpp:329

signet::forge::TensorView::data
void * data() noexcept
Raw mutable pointer to the underlying data buffer.
Definition tensor_bridge.hpp:305

signet::forge::expected
A lightweight result type that holds either a success value of type T or an Error.
Definition error.hpp:143

error.hpp

signet::forge
Definition audit_chain.hpp:74

signet::forge::prepare_for_onnx
expected< OnnxTensorInfo > prepare_for_onnx(const TensorView &tensor)
Prepare a TensorView for ONNX Runtime consumption (zero-copy).
Definition onnx_bridge.hpp:176

signet::forge::onnx_type_name
const char * onnx_type_name(OnnxTensorType t)
Return a human-readable string for an OnnxTensorType value.
Definition onnx_bridge.hpp:298

signet::forge::OnnxTensorType
OnnxTensorType
ONNX tensor element data types, mirroring OrtTensorElementDataType.
Definition onnx_bridge.hpp:39

signet::forge::OnnxTensorType::UNDEFINED
@ UNDEFINED
No type (invalid / uninitialized)

signet::forge::OnnxTensorType::UINT32
@ UINT32
32-bit unsigned integer

signet::forge::OnnxTensorType::UINT16
@ UINT16
16-bit unsigned integer

signet::forge::OnnxTensorType::INT64
@ INT64
64-bit signed integer

signet::forge::OnnxTensorType::INT16
@ INT16
16-bit signed integer

signet::forge::OnnxTensorType::STRING
@ STRING
Variable-length string.

signet::forge::OnnxTensorType::INT32
@ INT32
32-bit signed integer

signet::forge::OnnxTensorType::UINT64
@ UINT64
64-bit unsigned integer

signet::forge::OnnxTensorType::BFLOAT16
@ BFLOAT16
Brain floating-point (bfloat16)

signet::forge::OnnxTensorType::BOOL
@ BOOL
Boolean (1 byte)

signet::forge::OnnxTensorType::FLOAT16
@ FLOAT16
16-bit IEEE float (float16)

signet::forge::OnnxTensorType::FLOAT
@ FLOAT
32-bit IEEE float (float32)

signet::forge::OnnxTensorType::UINT8
@ UINT8
8-bit unsigned integer

signet::forge::OnnxTensorType::INT8
@ INT8
8-bit signed integer

signet::forge::OnnxTensorType::DOUBLE
@ DOUBLE
64-bit IEEE float (float64)

signet::forge::to_onnx_type
OnnxTensorType to_onnx_type(TensorDataType dtype)
Convert a Signet TensorDataType to the corresponding OnnxTensorType.
Definition onnx_bridge.hpp:68

signet::forge::from_onnx_type
expected< TensorDataType > from_onnx_type(OnnxTensorType ort_type)
Convert an OnnxTensorType back to a Signet TensorDataType.
Definition onnx_bridge.hpp:90

signet::forge::ErrorCode::UNSUPPORTED_TYPE
@ UNSUPPORTED_TYPE
The file contains a Parquet physical or logical type that is not implemented.

signet::forge::ErrorCode::INTERNAL_ERROR
@ INTERNAL_ERROR
An unexpected internal error that does not fit any other category.

signet::forge::ErrorCode::INVALID_ARGUMENT
@ INVALID_ARGUMENT
A caller-supplied argument is outside the valid range or violates a precondition.

signet::forge::prepare_inputs_for_onnx
expected< OnnxInputSet > prepare_inputs_for_onnx(const std::vector< std::pair< std::string, TensorView > > &inputs)
Prepare a batch of named TensorViews for ONNX Runtime inference.
Definition onnx_bridge.hpp:264

signet::forge::TensorDataType
TensorDataType
Element data type for tensor storage, mapping to ONNX/PyTorch/TF type enums.
Definition tensor_bridge.hpp:148

signet::forge::TensorDataType::FLOAT64
@ FLOAT64
IEEE 754 double-precision (8 bytes)

signet::forge::TensorDataType::INT64
@ INT64
Signed 64-bit integer.

signet::forge::TensorDataType::INT16
@ INT16
Signed 16-bit integer.

signet::forge::TensorDataType::INT32
@ INT32
Signed 32-bit integer.

signet::forge::TensorDataType::FLOAT32
@ FLOAT32
IEEE 754 single-precision (4 bytes)

signet::forge::TensorDataType::BOOL
@ BOOL
Boolean (1 byte)

signet::forge::TensorDataType::FLOAT16
@ FLOAT16
IEEE 754 half-precision (2 bytes)

signet::forge::TensorDataType::UINT8
@ UINT8
Unsigned 8-bit integer.

signet::forge::TensorDataType::INT8
@ INT8
Signed 8-bit integer.

signet::forge::Error
Lightweight error value carrying an ErrorCode and a human-readable message.
Definition error.hpp:99

signet::forge::Error::code
ErrorCode code
The machine-readable error category.
Definition error.hpp:101

signet::forge::OnnxInputSet
A set of named ONNX tensors for multi-input model inference.
Definition onnx_bridge.hpp:235

signet::forge::OnnxInputSet::tensors
std::vector< OnnxTensorInfo > tensors
Prepared tensor infos (parallel with names)
Definition onnx_bridge.hpp:237

signet::forge::OnnxInputSet::is_valid
bool is_valid() const
Check whether all tensors are valid and the set is non-empty.
Definition onnx_bridge.hpp:243

signet::forge::OnnxInputSet::names
std::vector< std::string > names
Model input names (parallel with tensors)
Definition onnx_bridge.hpp:236

signet::forge::OnnxTensorInfo
Contains all information needed to create an OrtValue externally.
Definition onnx_bridge.hpp:138

signet::forge::OnnxTensorInfo::is_valid
bool is_valid() const
Check whether this info is ready to be used with OrtApi::CreateTensorWithDataAsOrtValue.
Definition onnx_bridge.hpp:150

signet::forge::OnnxTensorInfo::element_type
OnnxTensorType element_type
ONNX element data type.
Definition onnx_bridge.hpp:141

signet::forge::OnnxTensorInfo::byte_size
size_t byte_size
Total data size in bytes (product of shape * element size)
Definition onnx_bridge.hpp:142

signet::forge::OnnxTensorInfo::is_owner
bool is_owner
If true, the data was allocated by the bridge and the caller must free it.
Definition onnx_bridge.hpp:143

signet::forge::OnnxTensorInfo::data
void * data
Pointer to contiguous tensor data (non-owning unless is_owner)
Definition onnx_bridge.hpp:139

signet::forge::OnnxTensorInfo::shape
std::vector< int64_t > shape
ONNX shape dimensions (e.g. {batch, features})
Definition onnx_bridge.hpp:140

signet::forge::TensorShape::dims
std::vector< int64_t > dims
Dimension sizes (e.g. {32, 768} for a 32x768 matrix)
Definition tensor_bridge.hpp:208

tensor_bridge.hpp
Zero-copy tensor bridge: maps Parquet column data directly into ML-framework-compatible tensor views ...