Internal implementation details for dictionary encoding. More...

Namespaces
namespace	audit

namespace	snappy

namespace	writer

Classes
class	AlignedAllocator

struct	ArrowArrayPrivate
	Heap-allocated context attached to ArrowArray.private_data. More...

struct	ArrowSchemaPrivate
	Heap-allocated context attached to ArrowSchema.private_data. More...

struct	DLPackOwnedCtx
	Context stored in DLManagedTensor.manager_ctx for owning exports. More...

struct	DLPackViewCtx
	Context stored in DLManagedTensor.manager_ctx for non-owning exports. More...

Functions
template<typename T >
bool	is_pointer_aligned (const void *ptr) noexcept

template<typename T >
T *	aligned_ptr (void *ptr) noexcept

template<typename T >
const T *	aligned_ptr (const void *ptr) noexcept

template<typename T >
T *	aligned_ptr_at (void *base, std::size_t offset) noexcept

template<typename T >
const T *	aligned_ptr_at (const void *base, std::size_t offset) noexcept

uint32_t	crc32 (const void *data, size_t length) noexcept
	Compute CRC-32 over a contiguous byte buffer (polynomial 0xEDB88320).

uint32_t	crc32_combine (uint32_t crc_a, const void *data_b, size_t len_b) noexcept
	Combine two CRC regions without concatenating buffers.

int64_t	now_ns () noexcept
	Return nanoseconds since Unix epoch (cross-platform).

void	write_le32 (uint8_t *dst, uint32_t v) noexcept
	Write a 32-bit unsigned integer in little-endian byte order.

void	write_le64 (uint8_t *dst, uint64_t v) noexcept
	Write a 64-bit unsigned integer in little-endian byte order.

uint32_t	read_le32 (const uint8_t *src) noexcept
	Read a 32-bit unsigned integer from little-endian byte order.

uint64_t	read_le64 (const uint8_t *src) noexcept
	Read a 64-bit unsigned integer from little-endian byte order.

int	full_fsync (int fd) noexcept
	Force durable flush to storage media.

int	dict_bit_width (size_t dict_size)
	Compute the minimum bit width needed to represent dictionary indices.

void	plain_encode_value (std::vector< uint8_t > &buf, const std::string &val)
	Append a string value in PLAIN BYTE_ARRAY format (4-byte LE length prefix + raw bytes).

void	plain_encode_value (std::vector< uint8_t > &buf, int32_t val)
	Append an int32_t in PLAIN format (4-byte little-endian).

void	plain_encode_value (std::vector< uint8_t > &buf, int64_t val)
	Append an int64_t in PLAIN format (8-byte little-endian).

void	plain_encode_value (std::vector< uint8_t > &buf, float val)
	Append a float in PLAIN format (4-byte little-endian, IEEE 754).

void	plain_encode_value (std::vector< uint8_t > &buf, double val)
	Append a double in PLAIN format (8-byte little-endian, IEEE 754).

std::string	plain_decode_value (const uint8_t data, size_t &pos, size_t size, std::string )
	Decode a string from PLAIN BYTE_ARRAY format at `data`[pos].

int32_t	plain_decode_value (const uint8_t data, size_t &pos, size_t size, int32_t )
	Decode an int32_t from PLAIN format at `data`[pos].

int64_t	plain_decode_value (const uint8_t data, size_t &pos, size_t size, int64_t )
	Decode an int64_t from PLAIN format at `data`[pos].

float	plain_decode_value (const uint8_t data, size_t &pos, size_t size, float )
	Decode a float from PLAIN format at `data`[pos].

double	plain_decode_value (const uint8_t data, size_t &pos, size_t size, double )
	Decode a double from PLAIN format at `data`[pos].

void	release_arrow_schema (ArrowSchema *schema)
	Release callback for ArrowSchema.

void	release_arrow_array (ArrowArray *array)
	Release callback for ArrowArray.

void	dlpack_view_deleter (DLManagedTensor *self)
	Deleter for a DLManagedTensor created from a TensorView (non-owning).

void	dlpack_owned_deleter (DLManagedTensor *self)
	Deleter for a DLManagedTensor created from an OwnedTensor (owning).

const char *	tensor_dtype_to_pybuf_format (TensorDataType dtype)
	Map TensorDataType to a Python struct format character (PEP 3118).

double	parse_double (std::string_view sv) noexcept

float	parse_float (std::string_view sv) noexcept

bool	try_parse_double (std::string_view sv, double &out) noexcept
	Try parsing a string_view as double; returns true on full parse success.

Detailed Description

Internal implementation details for dictionary encoding.

Function Documentation

◆ aligned_ptr() [1/2]

template<typename T >

const T * signet::forge::detail::aligned_ptr ( const void * ptr )

inlinenoexcept

Definition at line 125 of file tensor_bridge.hpp.

◆ aligned_ptr() [2/2]

template<typename T >

T * signet::forge::detail::aligned_ptr ( void * ptr )

inlinenoexcept

Definition at line 120 of file tensor_bridge.hpp.

◆ aligned_ptr_at() [1/2]

template<typename T >

const T * signet::forge::detail::aligned_ptr_at	(	const void *	base,
		std::size_t	offset
	)

inlinenoexcept

Definition at line 136 of file tensor_bridge.hpp.

◆ aligned_ptr_at() [2/2]

template<typename T >

T * signet::forge::detail::aligned_ptr_at	(	void *	base,
		std::size_t	offset
	)

inlinenoexcept

Definition at line 130 of file tensor_bridge.hpp.

◆ crc32()

uint32_t signet::forge::detail::crc32	(	const void *	data,
		size_t	length
	)

inlinenoexcept

Compute CRC-32 over a contiguous byte buffer (polynomial 0xEDB88320).

Note: L20: This CRC-32 is used for crash recovery only (detecting torn writes / partial records). It is NOT a cryptographic integrity check and provides no tamper-evidence guarantees — CRC-32 is trivially forgeable. For tamper-evident audit trails, use the SHA-256 hash chain in audit_chain.hpp.

Parameters

data	Pointer to input bytes.
length	Number of bytes to checksum.

Returns: CRC-32 checksum.

Definition at line 85 of file wal.hpp.

◆ crc32_combine()

uint32_t signet::forge::detail::crc32_combine	(	uint32_t	crc_a,
		const void *	data_b,
		size_t	len_b
	)

inlinenoexcept

Combine two CRC regions without concatenating buffers.

Note: Currently a no-op placeholder; kept as a hook for future incremental CRC.

Definition at line 108 of file wal.hpp.

◆ dict_bit_width()

int signet::forge::detail::dict_bit_width ( size_t dict_size )

inline

Compute the minimum bit width needed to represent dictionary indices.

Returns 0 for dict_size <= 1 (single-entry dictionaries need 0 bits), otherwise returns ceil(log2(dict_size)), which is the number of bits needed to represent index values in the range [0, dict_size - 1].

Parameters

dict_size Number of entries in the dictionary.

Returns: Bit width (0 for dict_size <= 1).

Definition at line 66 of file dictionary.hpp.

◆ dlpack_owned_deleter()

void signet::forge::detail::dlpack_owned_deleter ( DLManagedTensor * self )

inline

Deleter for a DLManagedTensor created from an OwnedTensor (owning).

Frees the DLPackOwnedCtx (which destroys the OwnedTensor and its data), then frees the DLManagedTensor itself.

Parameters

self	The DLManagedTensor to destroy (null-safe).

Definition at line 291 of file numpy_bridge.hpp.

◆ dlpack_view_deleter()

void signet::forge::detail::dlpack_view_deleter ( DLManagedTensor * self )

inline

Deleter for a DLManagedTensor created from a TensorView (non-owning).

Frees the DLPackViewCtx (shape array) and the DLManagedTensor itself. Does NOT free the underlying tensor data.

Parameters

self	The DLManagedTensor to destroy (null-safe).

Definition at line 276 of file numpy_bridge.hpp.

◆ full_fsync()

int signet::forge::detail::full_fsync ( int fd )

inlinenoexcept

Force durable flush to storage media.

Uses F_FULLFSYNC on macOS, FlushFileBuffers on Windows, and fsync on Linux.

Parameters

fd	File descriptor to sync.

Returns: 0 on success, -1 on failure.

Definition at line 180 of file wal.hpp.

◆ is_pointer_aligned()

template<typename T >

bool signet::forge::detail::is_pointer_aligned ( const void * ptr )

inlinenoexcept

Definition at line 114 of file tensor_bridge.hpp.

◆ now_ns()

int64_t signet::forge::detail::now_ns ( )

inlinenoexcept

Return nanoseconds since Unix epoch (cross-platform).

Uses CLOCK_REALTIME on POSIX, timespec_get on Windows.

Returns: Current wall-clock time in nanoseconds.

Definition at line 120 of file wal.hpp.

◆ parse_double()

double signet::forge::detail::parse_double ( std::string_view sv )

inlinenoexcept

Definition at line 84 of file writer.hpp.

◆ parse_float()

float signet::forge::detail::parse_float ( std::string_view sv )

inlinenoexcept

Definition at line 102 of file writer.hpp.

◆ plain_decode_value() [1/5]

double signet::forge::detail::plain_decode_value	(	const uint8_t *	data,
		size_t &	pos,
		size_t	size,
		double *
	)

inline

Decode a double from PLAIN format at data[pos].

Advances pos by 8.

Parameters

data	Pointer to the encoded byte stream.
pos	Current read position (updated on return).
size	Total size of the byte stream.

Returns: The decoded double value, or 0.0 if insufficient data.

Definition at line 222 of file dictionary.hpp.

◆ plain_decode_value() [2/5]

float signet::forge::detail::plain_decode_value	(	const uint8_t *	data,
		size_t &	pos,
		size_t	size,
		float *
	)

inline

Decode a float from PLAIN format at data[pos].

Advances pos by 4.

Parameters

data	Pointer to the encoded byte stream.
pos	Current read position (updated on return).
size	Total size of the byte stream.

Returns: The decoded float value, or 0.0f if insufficient data.

Definition at line 207 of file dictionary.hpp.

◆ plain_decode_value() [3/5]

int32_t signet::forge::detail::plain_decode_value	(	const uint8_t *	data,
		size_t &	pos,
		size_t	size,
		int32_t *
	)

inline

Decode an int32_t from PLAIN format at data[pos].

Advances pos by 4.

Parameters

data	Pointer to the encoded byte stream.
pos	Current read position (updated on return).
size	Total size of the byte stream.

Returns: The decoded int32 value, or 0 if insufficient data.

Definition at line 177 of file dictionary.hpp.

◆ plain_decode_value() [4/5]

int64_t signet::forge::detail::plain_decode_value	(	const uint8_t *	data,
		size_t &	pos,
		size_t	size,
		int64_t *
	)

inline

Decode an int64_t from PLAIN format at data[pos].

Advances pos by 8.

Parameters

data	Pointer to the encoded byte stream.
pos	Current read position (updated on return).
size	Total size of the byte stream.

Returns: The decoded int64 value, or 0 if insufficient data.

Definition at line 192 of file dictionary.hpp.

◆ plain_decode_value() [5/5]

std::string signet::forge::detail::plain_decode_value	(	const uint8_t *	data,
		size_t &	pos,
		size_t	size,
		std::string *
	)

inline

Decode a string from PLAIN BYTE_ARRAY format at data[pos].

Reads a 4-byte LE length prefix followed by raw bytes. Advances pos past the consumed bytes. Returns an empty string if the buffer is too small.

Parameters

data	Pointer to the encoded byte stream.
pos	Current read position (updated on return).
size	Total size of the byte stream.

Returns: The decoded string value.

Definition at line 159 of file dictionary.hpp.

◆ plain_encode_value() [1/5]

void signet::forge::detail::plain_encode_value	(	std::vector< uint8_t > &	buf,
		const std::string &	val
	)

inline

Append a string value in PLAIN BYTE_ARRAY format (4-byte LE length prefix + raw bytes).

Parameters

buf	Output byte buffer.
val	The string value to encode.

Definition at line 84 of file dictionary.hpp.

◆ plain_encode_value() [2/5]

void signet::forge::detail::plain_encode_value	(	std::vector< uint8_t > &	buf,
		double	val
	)

inline

Append a double in PLAIN format (8-byte little-endian, IEEE 754).

Parameters

buf	Output byte buffer.
val	The double value to encode.

Definition at line 139 of file dictionary.hpp.

◆ plain_encode_value() [3/5]

void signet::forge::detail::plain_encode_value	(	std::vector< uint8_t > &	buf,
		float	val
	)

inline

Append a float in PLAIN format (4-byte little-endian, IEEE 754).

Parameters

buf	Output byte buffer.
val	The float value to encode.

Definition at line 126 of file dictionary.hpp.

◆ plain_encode_value() [4/5]

void signet::forge::detail::plain_encode_value	(	std::vector< uint8_t > &	buf,
		int32_t	val
	)

inline

Append an int32_t in PLAIN format (4-byte little-endian).

Parameters

buf	Output byte buffer.
val	The int32 value to encode.

Definition at line 100 of file dictionary.hpp.

◆ plain_encode_value() [5/5]

void signet::forge::detail::plain_encode_value	(	std::vector< uint8_t > &	buf,
		int64_t	val
	)

inline

Append an int64_t in PLAIN format (8-byte little-endian).

Parameters

buf	Output byte buffer.
val	The int64 value to encode.

Definition at line 113 of file dictionary.hpp.

◆ read_le32()

uint32_t signet::forge::detail::read_le32 ( const uint8_t * src )

inlinenoexcept

Read a 32-bit unsigned integer from little-endian byte order.

Parameters

src	Source buffer (must have at least 4 bytes).

Returns: Decoded value.

Definition at line 155 of file wal.hpp.

◆ read_le64()

uint64_t signet::forge::detail::read_le64 ( const uint8_t * src )

inlinenoexcept

Read a 64-bit unsigned integer from little-endian byte order.

Parameters

src	Source buffer (must have at least 8 bytes).

Returns: Decoded value.

Definition at line 164 of file wal.hpp.

◆ release_arrow_array()

void signet::forge::detail::release_arrow_array ( ArrowArray * array )

inline

Release callback for ArrowArray.

Frees the ArrowArrayPrivate context and optionally the data buffer (if owns_data is true).

After release, buffers and release are set to nullptr (indicating "already released").

Parameters

array The array to release (null-safe).

Definition at line 143 of file arrow_bridge.hpp.

◆ release_arrow_schema()

void signet::forge::detail::release_arrow_schema ( ArrowSchema * schema )

inline

Release callback for ArrowSchema.

Frees the ArrowSchemaPrivate context.

After release, all pointer fields are set to nullptr and the release function pointer itself is cleared (indicating "already released").

Parameters

schema The schema to release (null-safe).

Definition at line 125 of file arrow_bridge.hpp.

◆ tensor_dtype_to_pybuf_format()

const char * signet::forge::detail::tensor_dtype_to_pybuf_format ( TensorDataType dtype )

inline

Map TensorDataType to a Python struct format character (PEP 3118).

Parameters

dtype The Signet tensor data type.

Returns: Single-character format string, or nullptr if no mapping exists (should not occur for valid TensorDataType values).

Definition at line 680 of file numpy_bridge.hpp.

◆ try_parse_double()

bool signet::forge::detail::try_parse_double	(	std::string_view	sv,
		double &	out
	)

inlinenoexcept

Try parsing a string_view as double; returns true on full parse success.

Used for CSV type-detection (auto-detect DOUBLE columns).

Definition at line 119 of file writer.hpp.

◆ write_le32()

void signet::forge::detail::write_le32	(	uint8_t *	dst,
		uint32_t	v
	)

inlinenoexcept

Write a 32-bit unsigned integer in little-endian byte order.

Parameters

dst	Destination buffer (must have at least 4 bytes).
v	Value to write.

Definition at line 133 of file wal.hpp.

◆ write_le64()

void signet::forge::detail::write_le64	(	uint8_t *	dst,
		uint64_t	v
	)

inlinenoexcept

Write a 64-bit unsigned integer in little-endian byte order.

Parameters

dst	Destination buffer (must have at least 8 bytes).
v	Value to write.

Definition at line 142 of file wal.hpp.

Namespaces

Classes

Functions

Detailed Description

Function Documentation

◆ aligned_ptr() [1/2]

◆ aligned_ptr() [2/2]

◆ aligned_ptr_at() [1/2]

◆ aligned_ptr_at() [2/2]

◆ crc32()

◆ crc32_combine()

◆ dict_bit_width()

◆ dlpack_owned_deleter()

◆ dlpack_view_deleter()

◆ full_fsync()

◆ is_pointer_aligned()

◆ now_ns()

◆ parse_double()

◆ parse_float()

◆ plain_decode_value() [1/5]

◆ plain_decode_value() [2/5]

◆ plain_decode_value() [3/5]

◆ plain_decode_value() [4/5]

◆ plain_decode_value() [5/5]

◆ plain_encode_value() [1/5]

◆ plain_encode_value() [2/5]

◆ plain_encode_value() [3/5]

◆ plain_encode_value() [4/5]

◆ plain_encode_value() [5/5]

◆ read_le32()

◆ read_le64()

◆ release_arrow_array()

◆ release_arrow_schema()

◆ tensor_dtype_to_pybuf_format()

◆ try_parse_double()

◆ write_le32()

◆ write_le64()