Signet Forge 0.1.0
C++20 Parquet library with AI-native extensions
DEMO
Loading...
Searching...
No Matches
gzip.hpp
Go to the documentation of this file.
1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Copyright 2026 Johnson Ogundeji
3#pragma once
4
24
25#ifdef SIGNET_HAS_GZIP
26
28
29#include <zlib.h>
30
31#include <cstdint>
32#include <limits>
33#include <string>
34#include <vector>
35
36namespace signet::forge {
37
46class GzipCodec : public CompressionCodec {
47public:
53 explicit GzipCodec(int compression_level = Z_DEFAULT_COMPRESSION)
54 : level_(compression_level) {}
55
66 [[nodiscard]] expected<std::vector<uint8_t>> compress(
67 const uint8_t* data, size_t size) const override {
68
69 // Pessimistic upper bound: deflate worst case is ~0.1% expansion
70 // plus gzip header/trailer (~18 bytes). deflateBound gives an
71 // accurate upper bound once the stream is initialized, but we
72 // need a reasonable initial allocation before that.
73 // CWE-190: Integer Overflow (zlib uses uInt for sizes)
74 if (size > (std::numeric_limits<uInt>::max)()) {
75 return Error{ErrorCode::INTERNAL_ERROR,
76 "GZIP: input exceeds uInt limit"};
77 }
78
79 z_stream stream{};
80 stream.next_in = const_cast<Bytef*>(data);
81 stream.avail_in = static_cast<uInt>(size);
82
83 // windowBits = 15 + 16 enables gzip format (not raw deflate).
84 int ret = deflateInit2(&stream,
85 level_,
86 Z_DEFLATED,
87 15 + 16, // gzip framing
88 8, // default memLevel
89 Z_DEFAULT_STRATEGY);
90 if (ret != Z_OK) {
91 return Error{ErrorCode::INTERNAL_ERROR,
92 "GZIP: deflateInit2 failed (zlib error " +
93 std::to_string(ret) + ")"};
94 }
95
96 // Use deflateBound for an accurate output size estimate.
97 uLong bound = deflateBound(&stream, static_cast<uLong>(size));
98 std::vector<uint8_t> out(bound);
99
100 stream.next_out = out.data();
101 stream.avail_out = static_cast<uInt>(out.size());
102
103 ret = deflate(&stream, Z_FINISH);
104 if (ret != Z_STREAM_END) {
105 deflateEnd(&stream);
106 return Error{ErrorCode::INTERNAL_ERROR,
107 "GZIP: deflate failed (zlib error " +
108 std::to_string(ret) + ")"};
109 }
110
111 size_t compressed_size = stream.total_out;
112 deflateEnd(&stream);
113
114 out.resize(compressed_size);
115 return out;
116 }
117
130 [[nodiscard]] expected<std::vector<uint8_t>> decompress(
131 const uint8_t* data, size_t size,
132 size_t uncompressed_size) const override {
133
134 static constexpr size_t MAX_DECOMPRESS_SIZE = 256 * 1024 * 1024; // 256 MB
135 if (uncompressed_size > MAX_DECOMPRESS_SIZE)
136 return Error{ErrorCode::INVALID_ARGUMENT, "Decompression size exceeds 256 MB limit"};
137
138 if (uncompressed_size == 0) {
139 return std::vector<uint8_t>{};
140 }
141
142 // CWE-190: Integer Overflow (zlib uses uInt for sizes)
143 if (size > (std::numeric_limits<uInt>::max)()) {
144 return Error{ErrorCode::INTERNAL_ERROR,
145 "GZIP: compressed input exceeds uInt limit"};
146 }
147 // CWE-190: Integer Overflow (zlib uses uInt for sizes)
148 if (uncompressed_size > (std::numeric_limits<uInt>::max)()) {
149 return Error{ErrorCode::INTERNAL_ERROR,
150 "GZIP: uncompressed size exceeds uInt limit"};
151 }
152
153 std::vector<uint8_t> out(uncompressed_size);
154
155 z_stream stream{};
156 stream.next_in = const_cast<Bytef*>(data);
157 stream.avail_in = static_cast<uInt>(size);
158 stream.next_out = out.data();
159 stream.avail_out = static_cast<uInt>(out.size());
160
161 // windowBits = 15 + 16 to accept gzip format.
162 int ret = inflateInit2(&stream, 15 + 16);
163 if (ret != Z_OK) {
164 return Error{ErrorCode::CORRUPT_PAGE,
165 "GZIP: inflateInit2 failed (zlib error " +
166 std::to_string(ret) + ")"};
167 }
168
169 ret = inflate(&stream, Z_FINISH);
170 if (ret != Z_STREAM_END) {
171 inflateEnd(&stream);
172 return Error{ErrorCode::CORRUPT_PAGE,
173 "GZIP: inflate failed (zlib error " +
174 std::to_string(ret) +
175 (stream.msg ? std::string(", ") + stream.msg
176 : std::string()) + ")"};
177 }
178
179 size_t decompressed_size = stream.total_out;
180 inflateEnd(&stream);
181
182 if (decompressed_size != uncompressed_size) {
183 return Error{ErrorCode::CORRUPT_PAGE,
184 "GZIP: decompressed " +
185 std::to_string(decompressed_size) +
186 " bytes but expected " +
187 std::to_string(uncompressed_size)};
188 }
189
190 return out;
191 }
192
195
197 [[nodiscard]] Compression codec_type() const override {
198 return Compression::GZIP;
199 }
200
202 [[nodiscard]] const char* name() const override {
203 return "gzip";
204 }
205
207
208private:
210 int level_;
211};
212
213// ===========================================================================
214// Auto-registration helper
215// ===========================================================================
216
225inline void register_gzip_codec(int level = Z_DEFAULT_COMPRESSION) {
226 CodecRegistry::instance().register_codec(std::make_unique<GzipCodec>(level));
227}
228
229} // namespace signet::forge
230
231#endif // SIGNET_HAS_GZIP
void register_codec(std::unique_ptr< CompressionCodec > codec)
Register a codec, transferring ownership to the registry.
Definition codec.hpp:105
static CodecRegistry & instance()
Access the process-wide singleton instance.
Definition codec.hpp:94
Compression codec interface and registry for Signet Forge.
Compression
Parquet compression codecs.
Definition types.hpp:115
expected< std::vector< uint8_t > > decompress(Compression codec, const uint8_t *data, size_t size, size_t uncompressed_size)
Decompress data using the specified codec via the global CodecRegistry.
Definition codec.hpp:213
expected< std::vector< uint8_t > > compress(Compression codec, const uint8_t *data, size_t size)
Compress data using the specified codec via the global CodecRegistry.
Definition codec.hpp:183