Zigbook – Learn the Zig Programming Language

概述

这个项目将原始字节转换为整洁、对齐感知的十六进制视图。我们将增量读取文件，将每行格式化为偏移量: 十六进制 ASCII，并在不同平台间保持输出稳定。写入器接口通过std.fs.File.writer和std.Io.Writer使用缓冲的stdout，如File.zig和Io.zig中所述。

格式化程序默认每行打印16个字节，并可通过--width N（4..32）进行配置。字节被分组为8|8以便于扫描，不可打印的ASCII在右侧边栏中显示为点，如fmt.zig和#命令行标志中所述。

学习目标

解析CLI标志并用std.fmt.parseInt验证数字。
用固定缓冲区流式传输文件并组装精确宽度的输出行。
使用非弃用的File.Writer + Io.Writer来缓冲stdout并干净地刷新。

构建转储

我们将连接三个部分：一个微小的CLI解析器，一个行格式化程序，以及一个以精确宽度块向格式化程序提供数据的循环。该实现依赖于Zig的切片和显式生命周期（在释放参数之前复制路径）以保持健壮性；参见process.zig和#错误处理。

Zig

const std = @import("std");

// Chapter 9 – Project: Hexdump
//
// A small, alignment-aware hexdump that prints:
//   OFFSET: 16 hex bytes (grouped 8|8)  ASCII
// Default width is 16 bytes per line; override with --width N (4..32).
//
// Usage:
//   zig run hexdump.zig -- <path>
//   zig run hexdump.zig -- --width 8 <path>
// 第9章 - 项目：十六进制转储
//
// 一个小的、对齐感知的十六进制转储工具，打印格式为：
//   OFFSET: 16个十六进制字节（8|8分组）  ASCII
// 默认每行宽度为16字节；可通过--width N覆盖（4..32）。
//
// 用法：
//   zig run hexdump.zig -- <path>
//   zig run hexdump.zig -- --width 8 <path>

const Cli = struct {
    width: usize = 16,
    path: []const u8 = &[_]u8{},
};

fn printUsage() void {
    std.debug.print("usage: hexdump [--width N] <path>\n", .{});
}

fn parseArgs(allocator: std.mem.Allocator) !Cli {
    var cli: Cli = .{};
    const args = try std.process.argsAlloc(allocator);
    defer std.process.argsFree(allocator, args);

    if (args.len == 1 or (args.len == 2 and std.mem.eql(u8, args[1], "--help"))) {
        printUsage();
        std.process.exit(0);
    }

    var i: usize = 1;
    while (i + 1 < args.len and std.mem.eql(u8, args[i], "--width")) : (i += 2) {
        const val = args[i + 1];
        cli.width = std.fmt.parseInt(usize, val, 10) catch {
            std.debug.print("error: invalid width '{s}'\n", .{val});
            std.process.exit(2);
        };
        if (cli.width < 4 or cli.width > 32) {
            std.debug.print("error: width must be between 4 and 32\n", .{});
            std.process.exit(2);
        }
    }

    if (i >= args.len) {
        std.debug.print("error: expected <path>\n", .{});
        printUsage();
        std.process.exit(2);
    }

    // Duplicate the path so it remains valid after freeing args.
    // 复制路径以确保释放args后路径仍然有效。
    cli.path = try allocator.dupe(u8, args[i]);
    return cli;
}

fn isPrintable(c: u8) bool {
    // Printable ASCII (space through tilde)
    // 可打印ASCII（空格到波浪号）
    return c >= 0x20 and c <= 0x7E;
}

fn dumpLine(stdout: *std.Io.Writer, offset: usize, bytes: []const u8, width: usize) !void {
    // OFFSET (8 hex digits), colon and space
    // OFFSET（8个十六进制数字），冒号和空格
    try stdout.print("{X:0>8}: ", .{offset});

    // Hex bytes with grouping at 8
    // 十六进制字节，在8处分组
    var i: usize = 0;
    while (i < width) : (i += 1) {
        if (i < bytes.len) {
            try stdout.print("{X:0>2} ", .{bytes[i]});
        } else {
            // pad absent bytes to keep ASCII column aligned
            // 填充缺失的字节以保持ASCII列对齐
            try stdout.print("   ", .{});
        }
        if (i + 1 == width / 2) {
            try stdout.print(" ", .{}); // extra gap between 8|8
            // 8|8之间的额外间隙
        }
    }

    // Two spaces before ASCII gutter
    // ASCII列前的两个空格
    try stdout.print("  ", .{});

    i = 0;
    while (i < width) : (i += 1) {
        if (i < bytes.len) {
            const ch: u8 = if (isPrintable(bytes[i])) bytes[i] else '.';
            try stdout.print("{c}", .{ch});
        } else {
            try stdout.print(" ", .{});
        }
    }
    try stdout.print("\n", .{});
}

pub fn main() !void {
    const allocator = std.heap.page_allocator;
    const cli = try parseArgs(allocator);

    var file = std.fs.cwd().openFile(cli.path, .{ .mode = .read_only }) catch {
        std.debug.print("error: unable to open '{s}'\n", .{cli.path});
        std.process.exit(1);
    };
    defer file.close();

    // Buffered stdout using the modern File.Writer + Io.Writer interface.
    // 使用现代File.Writer + Io.Writer接口的缓冲stdout。
    var out_buf: [16 * 1024]u8 = undefined;
    var file_writer = std.fs.File.writer(std.fs.File.stdout(), &out_buf);
    const stdout = &file_writer.interface;

    var offset: usize = 0;
    var carry: [64]u8 = undefined; // enough for max width 32
    // 足以容纳最大宽度32
    var carry_len: usize = 0;

    var buf: [64 * 1024]u8 = undefined;
    while (true) {
        const n = try file.read(buf[0..]);
        if (n == 0 and carry_len == 0) break;

        var idx: usize = 0;
        while (idx < n) {
            // fill a line from carry + buffer bytes
            // 从carry + 缓冲区字节填充一行
            const need = cli.width - carry_len;
            const take = @min(need, n - idx);
            @memcpy(carry[carry_len .. carry_len + take], buf[idx .. idx + take]);
            carry_len += take;
            idx += take;

            if (carry_len == cli.width) {
                try dumpLine(stdout, offset, carry[0..carry_len], cli.width);
                offset += carry_len;
                carry_len = 0;
            }
        }

        if (n == 0 and carry_len > 0) {
            try dumpLine(stdout, offset, carry[0..carry_len], cli.width);
            offset += carry_len;
            carry_len = 0;
        }
    }
    try file_writer.end();
}

运行

Shell

$ zig run hexdump.zig -- sample.txt

输出

Shell

00000000: 48 65 6C 6C 6F 2C 20 48  65 78 64 75 6D 70 21 0A   Hello, Hexdump!.

ASCII边栏用.替换不可打印的字节；文件末尾的换行符在右侧显示为0A和一个点。

宽度和分组

传递--width N来更改每行的字节数。分组仍然将行分成两半（N/2）以保持视觉对齐。

运行

Shell

$ zig run hexdump.zig -- --width 8 sample.txt

输出

Shell

00000000: 48 65 6C 6C  6F 2C 20 48   Hello, H
00000008: 65 78 64 75  6D 70 21 0A   exdump!.

行格式化程序会填充十六进制和ASCII区域，以便在最后一行，当字节可能无法填满完整宽度时，列能很好地对齐。

注意与警告

避免使用已弃用的I/O接口；此示例使用File.writer外加一个Io.Writer缓冲区，并调用end()来刷新和设置最终位置。
十六进制格式化保持简单——除了偏移量外，没有-C风格的索引列。扩展格式化程序是一个简单的后续练习。
参数生命周期很重要：如果你在使用cli.path之前释放args，请复制路径字符串。

练习

添加--group N来控制额外空格的位置（当前为N = width/2）。
支持--offset 0xNN以从非零基地址开始。
为每行包括一个右侧的十六进制校验和，以及一个最终的页脚（例如，总字节数）。

替代方案和边缘情况

大文件：代码以固定大小的块流式传输并组装行；调整缓冲区大小以匹配你的I/O环境。
非ASCII编码：ASCII边栏故意做得很粗糙。要实现UTF-8感知，你需要一个更仔细的渲染器；参见unicode.zig。
二进制管道：当没有提供路径时从stdin读取；如果你想支持管道，请相应地调整打开/循环。