Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
signet::forge::detail Namespace Reference

Internal implementation details for dictionary encoding. More...

Namespaces

namespace  audit
 
namespace  snappy
 
namespace  writer
 

Classes

class  AlignedAllocator
 
struct  ArrowArrayPrivate
 Heap-allocated context attached to ArrowArray.private_data. More...
 
struct  ArrowSchemaPrivate
 Heap-allocated context attached to ArrowSchema.private_data. More...
 
struct  DLPackOwnedCtx
 Context stored in DLManagedTensor.manager_ctx for owning exports. More...
 
struct  DLPackViewCtx
 Context stored in DLManagedTensor.manager_ctx for non-owning exports. More...
 

Functions

template<typename T >
bool is_pointer_aligned (const void *ptr) noexcept
 
template<typename T >
T * aligned_ptr (void *ptr) noexcept
 
template<typename T >
const T * aligned_ptr (const void *ptr) noexcept
 
template<typename T >
T * aligned_ptr_at (void *base, std::size_t offset) noexcept
 
template<typename T >
const T * aligned_ptr_at (const void *base, std::size_t offset) noexcept
 
uint32_t crc32 (const void *data, size_t length) noexcept
 Compute CRC-32 over a contiguous byte buffer (polynomial 0xEDB88320).
 
uint32_t crc32_combine (uint32_t crc_a, const void *data_b, size_t len_b) noexcept
 Combine two CRC regions without concatenating buffers.
 
int64_t now_ns () noexcept
 Return nanoseconds since Unix epoch (cross-platform).
 
void write_le32 (uint8_t *dst, uint32_t v) noexcept
 Write a 32-bit unsigned integer in little-endian byte order.
 
void write_le64 (uint8_t *dst, uint64_t v) noexcept
 Write a 64-bit unsigned integer in little-endian byte order.
 
uint32_t read_le32 (const uint8_t *src) noexcept
 Read a 32-bit unsigned integer from little-endian byte order.
 
uint64_t read_le64 (const uint8_t *src) noexcept
 Read a 64-bit unsigned integer from little-endian byte order.
 
int full_fsync (int fd) noexcept
 Force durable flush to storage media.
 
int dict_bit_width (size_t dict_size)
 Compute the minimum bit width needed to represent dictionary indices.
 
void plain_encode_value (std::vector< uint8_t > &buf, const std::string &val)
 Append a string value in PLAIN BYTE_ARRAY format (4-byte LE length prefix + raw bytes).
 
void plain_encode_value (std::vector< uint8_t > &buf, int32_t val)
 Append an int32_t in PLAIN format (4-byte little-endian).
 
void plain_encode_value (std::vector< uint8_t > &buf, int64_t val)
 Append an int64_t in PLAIN format (8-byte little-endian).
 
void plain_encode_value (std::vector< uint8_t > &buf, float val)
 Append a float in PLAIN format (4-byte little-endian, IEEE 754).
 
void plain_encode_value (std::vector< uint8_t > &buf, double val)
 Append a double in PLAIN format (8-byte little-endian, IEEE 754).
 
std::string plain_decode_value (const uint8_t *data, size_t &pos, size_t size, std::string *)
 Decode a string from PLAIN BYTE_ARRAY format at data[pos].
 
int32_t plain_decode_value (const uint8_t *data, size_t &pos, size_t size, int32_t *)
 Decode an int32_t from PLAIN format at data[pos].
 
int64_t plain_decode_value (const uint8_t *data, size_t &pos, size_t size, int64_t *)
 Decode an int64_t from PLAIN format at data[pos].
 
float plain_decode_value (const uint8_t *data, size_t &pos, size_t size, float *)
 Decode a float from PLAIN format at data[pos].
 
double plain_decode_value (const uint8_t *data, size_t &pos, size_t size, double *)
 Decode a double from PLAIN format at data[pos].
 
void release_arrow_schema (ArrowSchema *schema)
 Release callback for ArrowSchema.
 
void release_arrow_array (ArrowArray *array)
 Release callback for ArrowArray.
 
void dlpack_view_deleter (DLManagedTensor *self)
 Deleter for a DLManagedTensor created from a TensorView (non-owning).
 
void dlpack_owned_deleter (DLManagedTensor *self)
 Deleter for a DLManagedTensor created from an OwnedTensor (owning).
 
const char * tensor_dtype_to_pybuf_format (TensorDataType dtype)
 Map TensorDataType to a Python struct format character (PEP 3118).
 
double parse_double (std::string_view sv) noexcept
 
float parse_float (std::string_view sv) noexcept
 
bool try_parse_double (std::string_view sv, double &out) noexcept
 Try parsing a string_view as double; returns true on full parse success.
 

Detailed Description

Internal implementation details for dictionary encoding.

Function Documentation

◆ aligned_ptr() [1/2]

template<typename T >
const T * signet::forge::detail::aligned_ptr ( const void *  ptr)
inlinenoexcept

Definition at line 125 of file tensor_bridge.hpp.

◆ aligned_ptr() [2/2]

template<typename T >
T * signet::forge::detail::aligned_ptr ( void *  ptr)
inlinenoexcept

Definition at line 120 of file tensor_bridge.hpp.

◆ aligned_ptr_at() [1/2]

template<typename T >
const T * signet::forge::detail::aligned_ptr_at ( const void *  base,
std::size_t  offset 
)
inlinenoexcept

Definition at line 136 of file tensor_bridge.hpp.

◆ aligned_ptr_at() [2/2]

template<typename T >
T * signet::forge::detail::aligned_ptr_at ( void *  base,
std::size_t  offset 
)
inlinenoexcept

Definition at line 130 of file tensor_bridge.hpp.

◆ crc32()

uint32_t signet::forge::detail::crc32 ( const void *  data,
size_t  length 
)
inlinenoexcept

Compute CRC-32 over a contiguous byte buffer (polynomial 0xEDB88320).

Note
L20: This CRC-32 is used for crash recovery only (detecting torn writes / partial records). It is NOT a cryptographic integrity check and provides no tamper-evidence guarantees — CRC-32 is trivially forgeable. For tamper-evident audit trails, use the SHA-256 hash chain in audit_chain.hpp.
Parameters
dataPointer to input bytes.
lengthNumber of bytes to checksum.
Returns
CRC-32 checksum.

Definition at line 85 of file wal.hpp.

◆ crc32_combine()

uint32_t signet::forge::detail::crc32_combine ( uint32_t  crc_a,
const void *  data_b,
size_t  len_b 
)
inlinenoexcept

Combine two CRC regions without concatenating buffers.

Note
Currently a no-op placeholder; kept as a hook for future incremental CRC.

Definition at line 108 of file wal.hpp.

◆ dict_bit_width()

int signet::forge::detail::dict_bit_width ( size_t  dict_size)
inline

Compute the minimum bit width needed to represent dictionary indices.

Returns 0 for dict_size <= 1 (single-entry dictionaries need 0 bits), otherwise returns ceil(log2(dict_size)), which is the number of bits needed to represent index values in the range [0, dict_size - 1].

Parameters
dict_sizeNumber of entries in the dictionary.
Returns
Bit width (0 for dict_size <= 1).

Definition at line 66 of file dictionary.hpp.

◆ dlpack_owned_deleter()

void signet::forge::detail::dlpack_owned_deleter ( DLManagedTensor self)
inline

Deleter for a DLManagedTensor created from an OwnedTensor (owning).

Frees the DLPackOwnedCtx (which destroys the OwnedTensor and its data), then frees the DLManagedTensor itself.

Parameters
selfThe DLManagedTensor to destroy (null-safe).

Definition at line 291 of file numpy_bridge.hpp.

◆ dlpack_view_deleter()

void signet::forge::detail::dlpack_view_deleter ( DLManagedTensor self)
inline

Deleter for a DLManagedTensor created from a TensorView (non-owning).

Frees the DLPackViewCtx (shape array) and the DLManagedTensor itself. Does NOT free the underlying tensor data.

Parameters
selfThe DLManagedTensor to destroy (null-safe).

Definition at line 276 of file numpy_bridge.hpp.

◆ full_fsync()

int signet::forge::detail::full_fsync ( int  fd)
inlinenoexcept

Force durable flush to storage media.

Uses F_FULLFSYNC on macOS, FlushFileBuffers on Windows, and fsync on Linux.

Parameters
fdFile descriptor to sync.
Returns
0 on success, -1 on failure.

Definition at line 180 of file wal.hpp.

◆ is_pointer_aligned()

template<typename T >
bool signet::forge::detail::is_pointer_aligned ( const void *  ptr)
inlinenoexcept

Definition at line 114 of file tensor_bridge.hpp.

◆ now_ns()

int64_t signet::forge::detail::now_ns ( )
inlinenoexcept

Return nanoseconds since Unix epoch (cross-platform).

Uses CLOCK_REALTIME on POSIX, timespec_get on Windows.

Returns
Current wall-clock time in nanoseconds.

Definition at line 120 of file wal.hpp.

◆ parse_double()

double signet::forge::detail::parse_double ( std::string_view  sv)
inlinenoexcept

Definition at line 84 of file writer.hpp.

◆ parse_float()

float signet::forge::detail::parse_float ( std::string_view  sv)
inlinenoexcept

Definition at line 102 of file writer.hpp.

◆ plain_decode_value() [1/5]

double signet::forge::detail::plain_decode_value ( const uint8_t *  data,
size_t &  pos,
size_t  size,
double *   
)
inline

Decode a double from PLAIN format at data[pos].

Advances pos by 8.

Parameters
dataPointer to the encoded byte stream.
posCurrent read position (updated on return).
sizeTotal size of the byte stream.
Returns
The decoded double value, or 0.0 if insufficient data.

Definition at line 222 of file dictionary.hpp.

◆ plain_decode_value() [2/5]

float signet::forge::detail::plain_decode_value ( const uint8_t *  data,
size_t &  pos,
size_t  size,
float *   
)
inline

Decode a float from PLAIN format at data[pos].

Advances pos by 4.

Parameters
dataPointer to the encoded byte stream.
posCurrent read position (updated on return).
sizeTotal size of the byte stream.
Returns
The decoded float value, or 0.0f if insufficient data.

Definition at line 207 of file dictionary.hpp.

◆ plain_decode_value() [3/5]

int32_t signet::forge::detail::plain_decode_value ( const uint8_t *  data,
size_t &  pos,
size_t  size,
int32_t *   
)
inline

Decode an int32_t from PLAIN format at data[pos].

Advances pos by 4.

Parameters
dataPointer to the encoded byte stream.
posCurrent read position (updated on return).
sizeTotal size of the byte stream.
Returns
The decoded int32 value, or 0 if insufficient data.

Definition at line 177 of file dictionary.hpp.

◆ plain_decode_value() [4/5]

int64_t signet::forge::detail::plain_decode_value ( const uint8_t *  data,
size_t &  pos,
size_t  size,
int64_t *   
)
inline

Decode an int64_t from PLAIN format at data[pos].

Advances pos by 8.

Parameters
dataPointer to the encoded byte stream.
posCurrent read position (updated on return).
sizeTotal size of the byte stream.
Returns
The decoded int64 value, or 0 if insufficient data.

Definition at line 192 of file dictionary.hpp.

◆ plain_decode_value() [5/5]

std::string signet::forge::detail::plain_decode_value ( const uint8_t *  data,
size_t &  pos,
size_t  size,
std::string *   
)
inline

Decode a string from PLAIN BYTE_ARRAY format at data[pos].

Reads a 4-byte LE length prefix followed by raw bytes. Advances pos past the consumed bytes. Returns an empty string if the buffer is too small.

Parameters
dataPointer to the encoded byte stream.
posCurrent read position (updated on return).
sizeTotal size of the byte stream.
Returns
The decoded string value.

Definition at line 159 of file dictionary.hpp.

◆ plain_encode_value() [1/5]

void signet::forge::detail::plain_encode_value ( std::vector< uint8_t > &  buf,
const std::string &  val 
)
inline

Append a string value in PLAIN BYTE_ARRAY format (4-byte LE length prefix + raw bytes).

Parameters
bufOutput byte buffer.
valThe string value to encode.

Definition at line 84 of file dictionary.hpp.

◆ plain_encode_value() [2/5]

void signet::forge::detail::plain_encode_value ( std::vector< uint8_t > &  buf,
double  val 
)
inline

Append a double in PLAIN format (8-byte little-endian, IEEE 754).

Parameters
bufOutput byte buffer.
valThe double value to encode.

Definition at line 139 of file dictionary.hpp.

◆ plain_encode_value() [3/5]

void signet::forge::detail::plain_encode_value ( std::vector< uint8_t > &  buf,
float  val 
)
inline

Append a float in PLAIN format (4-byte little-endian, IEEE 754).

Parameters
bufOutput byte buffer.
valThe float value to encode.

Definition at line 126 of file dictionary.hpp.

◆ plain_encode_value() [4/5]

void signet::forge::detail::plain_encode_value ( std::vector< uint8_t > &  buf,
int32_t  val 
)
inline

Append an int32_t in PLAIN format (4-byte little-endian).

Parameters
bufOutput byte buffer.
valThe int32 value to encode.

Definition at line 100 of file dictionary.hpp.

◆ plain_encode_value() [5/5]

void signet::forge::detail::plain_encode_value ( std::vector< uint8_t > &  buf,
int64_t  val 
)
inline

Append an int64_t in PLAIN format (8-byte little-endian).

Parameters
bufOutput byte buffer.
valThe int64 value to encode.

Definition at line 113 of file dictionary.hpp.

◆ read_le32()

uint32_t signet::forge::detail::read_le32 ( const uint8_t *  src)
inlinenoexcept

Read a 32-bit unsigned integer from little-endian byte order.

Parameters
srcSource buffer (must have at least 4 bytes).
Returns
Decoded value.

Definition at line 155 of file wal.hpp.

◆ read_le64()

uint64_t signet::forge::detail::read_le64 ( const uint8_t *  src)
inlinenoexcept

Read a 64-bit unsigned integer from little-endian byte order.

Parameters
srcSource buffer (must have at least 8 bytes).
Returns
Decoded value.

Definition at line 164 of file wal.hpp.

◆ release_arrow_array()

void signet::forge::detail::release_arrow_array ( ArrowArray array)
inline

Release callback for ArrowArray.

Frees the ArrowArrayPrivate context and optionally the data buffer (if owns_data is true).

After release, buffers and release are set to nullptr (indicating "already released").

Parameters
arrayThe array to release (null-safe).

Definition at line 143 of file arrow_bridge.hpp.

◆ release_arrow_schema()

void signet::forge::detail::release_arrow_schema ( ArrowSchema schema)
inline

Release callback for ArrowSchema.

Frees the ArrowSchemaPrivate context.

After release, all pointer fields are set to nullptr and the release function pointer itself is cleared (indicating "already released").

Parameters
schemaThe schema to release (null-safe).

Definition at line 125 of file arrow_bridge.hpp.

◆ tensor_dtype_to_pybuf_format()

const char * signet::forge::detail::tensor_dtype_to_pybuf_format ( TensorDataType  dtype)
inline

Map TensorDataType to a Python struct format character (PEP 3118).

Parameters
dtypeThe Signet tensor data type.
Returns
Single-character format string, or nullptr if no mapping exists (should not occur for valid TensorDataType values).

Definition at line 680 of file numpy_bridge.hpp.

◆ try_parse_double()

bool signet::forge::detail::try_parse_double ( std::string_view  sv,
double &  out 
)
inlinenoexcept

Try parsing a string_view as double; returns true on full parse success.

Used for CSV type-detection (auto-detect DOUBLE columns).

Definition at line 119 of file writer.hpp.

◆ write_le32()

void signet::forge::detail::write_le32 ( uint8_t *  dst,
uint32_t  v 
)
inlinenoexcept

Write a 32-bit unsigned integer in little-endian byte order.

Parameters
dstDestination buffer (must have at least 4 bytes).
vValue to write.

Definition at line 133 of file wal.hpp.

◆ write_le64()

void signet::forge::detail::write_le64 ( uint8_t *  dst,
uint64_t  v 
)
inlinenoexcept

Write a 64-bit unsigned integer in little-endian byte order.

Parameters
dstDestination buffer (must have at least 8 bytes).
vValue to write.

Definition at line 142 of file wal.hpp.