概述
Zig将其压缩API精简为实用核心:高质量解压缩器,可插入新的std.Io.Reader/Writer接口,并馈送TAR和ZIP等格式,而不会产生隐藏副作用。#reworked stdcompressflateflate.zig 将这些部分组合在一起,让您恢复日志、打包资产或将注册表直接读入内存,同时保持相同的显式资源管理纪律。
由于Zig将归档视为简单的字节流,挑战从魔术辅助函数转变为组合正确的迭代器、缓冲区和元数据检查。掌握这里的解压缩构建块为您准备包管道和部署工具。tar.zigzip.zig
学习目标
- 直接驱动
std.compress.flate.Decompress、std.compress.lzma2.decompress等对std.Io.Reader/Writer端点。Decompress.ziglzma2.zigWriter.zig - 选择历史缓冲区、流式传输限制和分配器,确保在调试和发布构建下解压缩都保持内存安全。10
- 动态生成小型TAR档案并迭代它们,而不触及磁盘状态。28
- 检查和提取ZIP中央目录条目,同时强制执行文件系统卫生和压缩方法约束。36
流式解压缩接口
Zig的解压缩器使用相同的流式方言:您可以将任何读取器传递给他们,可选择提供临时缓冲区,他们会将有效载荷发送给您已经拥有的写入器。这种设计让您完全控制分配、错误传播和刷新行为。22
实际中的Flate容器
Deflate风格的有效载荷(原始、zlib、gzip)依赖最多32 KiB的历史窗口。Zig 0.15.2允许您在将数据直接传输到另一个写入器时跳过该窗口的分配——传递 &.{},解码器将以最小缓冲区调用 streamRemaining。
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
const compressed = [_]u8{
0x78, 0x9c, 0x0b, 0x2e, 0x29, 0x4a, 0x4d, 0xcc, 0xcd, 0xcc, 0x4b, 0x57, 0x48, 0x49,
0x4d, 0xce, 0xcf, 0x2d, 0x28, 0x4a, 0x2d, 0x2e, 0xce, 0xcc, 0xcf, 0x53, 0xc8, 0x4e,
0x4d, 0x2d, 0x28, 0x56, 0x28, 0xc9, 0xcf, 0xcf, 0x29, 0x56, 0x00, 0x0a, 0xa6, 0x64,
0x26, 0x97, 0x24, 0x26, 0xe5, 0xa4, 0xea, 0x71, 0x01, 0x00, 0xdf, 0xba, 0x12, 0xa6,
};
var source: std.Io.Reader = .fixed(&compressed);
var inflater = std.compress.flate.Decompress.init(&source, .zlib, &.{});
var plain_buf: [128]u8 = undefined;
var sink = std.Io.Writer.fixed(&plain_buf);
const decoded_len = try inflater.reader.streamRemaining(&sink);
const decoded = plain_buf[0..decoded_len];
try stdout.print("decoded ({d} bytes): {s}\n", .{ decoded.len, decoded });
try stdout.flush();
}
$ zig run inflate_greeting.zigdecoded (49 bytes): Streaming decompression keeps tools predictable.std.Io.Writer.fixed 提供具有确定性容量的栈分配接收器;之后始终刷新手动stdout缓冲区,以避免在进程退出时丢失输出。1
无外部工具的LZMA2
一些注册表仍然提供LZMA2帧,用于确定性逐字节有效载荷。Zig将解码器包装在单个辅助函数后面,为您增长一个 std.Io.Writer.Allocating ——非常适合短配置包或固件块。12
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
const compressed = [_]u8{
0x01, 0x00, 0x05, 0x48, 0x65, 0x6c, 0x6c, 0x6f, 0x0a, 0x02, 0x00, 0x06, 0x57, 0x6f,
0x72, 0x6c, 0x64, 0x21, 0x0a, 0x00,
};
var stream = std.io.fixedBufferStream(&compressed);
var collector = std.Io.Writer.Allocating.init(allocator);
defer collector.deinit();
try std.compress.lzma2.decompress(allocator, stream.reader(), &collector.writer);
const decoded = collector.writer.buffer[0..collector.writer.end];
try stdout.print("lzma2 decoded ({d} bytes):\n{s}\n", .{ decoded.len, decoded });
try stdout.flush();
}
$ zig run lzma2_memory_decode.ziglzma2 decoded (13 bytes):
Hello
World!归档工作流
掌握了解压缩原语后,归档成为组合练习:特定格式的迭代器为您提供元数据,您决定是否缓冲、丢弃或流式传输到磁盘。28
完全在内存中进行的TAR往返
std.tar.Writer emits deterministic 512-byte blocks, so you can assemble small bundles in RAM, inspect them, and only then decide whether to persist them.24
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
var archive_storage: [4096]u8 = undefined;
var archive_writer = std.Io.Writer.fixed(&archive_storage);
var tar_writer = std.tar.Writer{ .underlying_writer = &archive_writer };
try tar_writer.writeDir("reports", .{ .mode = 0o755 });
try tar_writer.writeFileBytes(
"reports/summary.txt",
"cpu=28%\nmem=512MiB\n",
.{ .mode = 0o644 },
);
const archive = archive_writer.buffer[0..archive_writer.end];
try stdout.print("tar archive is {d} bytes and holds:\n", .{archive.len});
var source: std.Io.Reader = .fixed(archive);
var name_buf: [std.fs.max_path_bytes]u8 = undefined;
var link_buf: [std.fs.max_path_bytes]u8 = undefined;
var iter = std.tar.Iterator.init(&source, .{
.file_name_buffer = &name_buf,
.link_name_buffer = &link_buf,
});
while (try iter.next()) |entry| {
try stdout.print("- {s} ({s}, {d} bytes)\n", .{ entry.name, @tagName(entry.kind), entry.size });
if (entry.kind == .file) {
var file_buf: [128]u8 = undefined;
var file_writer = std.Io.Writer.fixed(&file_buf);
try iter.streamRemaining(entry, &file_writer);
const written = file_writer.end;
const payload = file_buf[0..written];
try stdout.print(" contents: {s}\n", .{payload});
}
}
try stdout.flush();
}
$ zig run tar_roundtrip.zigtar archive is 1536 bytes and holds:
- reports (directory, 0 bytes)
- reports/summary.txt (file, 19 bytes)
contents: cpu=28%
mem=512MiB在常规文件上调用 Iterator.next 后,您必须用 streamRemaining 排空有效载荷;否则,下一个头部将对齐错误,迭代器将抛出 error.UnexpectedEndOfStream。
安全地窥视ZIP中央目录
ZIP支持通过 std.zip.Iterator 公开中央目录,将提取策略留给您。通过 std.testing.tmpDir 路由条目可保持构件隔离,同时您验证压缩方法并检查内容。testing.zig
const std = @import("std");
pub fn main() !void {
var stdout_buffer: [4096]u8 = undefined;
var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer);
const stdout = &stdout_writer.interface;
const archive_bytes = @embedFile("demo.zip");
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
var tmp = std.testing.tmpDir(.{});
defer tmp.cleanup();
var zip_file = try tmp.dir.createFile("demo.zip", .{ .read = true, .truncate = true });
defer {
zip_file.close();
tmp.dir.deleteFile("demo.zip") catch {};
}
try zip_file.writeAll(archive_bytes);
try zip_file.seekTo(0);
var read_buffer: [4096]u8 = undefined;
var archive_reader = zip_file.reader(&read_buffer);
var iter = try std.zip.Iterator.init(&archive_reader);
var name_buf: [std.fs.max_path_bytes]u8 = undefined;
try stdout.print("zip archive contains:\n", .{});
while (try iter.next()) |entry| {
try entry.extract(&archive_reader, .{}, &name_buf, tmp.dir);
const name = name_buf[0..entry.filename_len];
try stdout.print(
"- {s} ({s}, {d} bytes)\n",
.{ name, @tagName(entry.compression_method), entry.uncompressed_size },
);
if (name.len != 0 and name[name.len - 1] == '/') continue;
var file = try tmp.dir.openFile(name, .{});
defer file.close();
const info = try file.stat();
const size: usize = @intCast(info.size);
const contents = try allocator.alloc(u8, size);
defer allocator.free(contents);
const read_len = try file.readAll(contents);
const slice = contents[0..read_len];
if (std.mem.endsWith(u8, name, ".txt")) {
try stdout.print(" text: {s}\n", .{slice});
} else {
try stdout.print(" bytes:", .{});
for (slice, 0..) |byte, idx| {
const prefix = if (idx % 16 == 0) "\n " else " ";
try stdout.print("{s}{X:0>2}", .{ prefix, byte });
}
try stdout.print("\n", .{});
}
}
try stdout.flush();
}
$ zig run zip_iterator_preview.zigzip archive contains:
- demo/readme.txt (store, 34 bytes)
text: Decompression from Zig streaming.
- demo/raw.bin (store, 4 bytes)
bytes:
00 01 02 03std.zip.Entry.extract 仅支持 store 和 deflate;预先拒绝其他方法,或在互操作性需要时使用第三方库。
混合源的模式目录
混合这些技术来充实包注册表的清单,在签名检查之前解压缩发布工件,或为GPU上传准备二进制blob——所有这些都不离开Zig的标准工具箱。35
注意事项与警告
- 向
std.compress.flate.Decompress.init传递零长度缓冲区会禁用历史重用,但大型归档受益于重用[flate.max_window_len]u8临时数组。 - TAR迭代器保持关于未读文件字节的状态;在前进到下一个头部之前始终流式传输或丢弃它们。
- ZIP提取仅在
allow_backslashes = true时规范化反斜杠;强制使用正斜杠以避免Windows上的目录遍历错误。33