diff --git a/src/loader.zig b/src/loader.zig index f101d06..71be4fe 100644 --- a/src/loader.zig +++ b/src/loader.zig @@ -509,20 +509,14 @@ fn normalizeDateTimeToIso(col_type: ColumnType, val: []const u8, buf: *[19]u8) [ return buf[0..19]; } -/// fmtThousands(buf, n) → []const u8 -/// Pre: buf.len >= 26 (accommodates any usize value with thousands separators) -/// Post: n is formatted as a decimal string with ',' separating each group of -/// three digits from the right (e.g. 42317 → "42,317", 1000 → "1,000") +/// Format n with thousands separators (e.g. 42317 → "42,317"). +/// buf must hold at least 26 bytes. pub fn fmtThousands(buf: []u8, n: usize) []const u8 { - var tmp: [32]u8 = undefined; // 20 digits max (u64) + safety margin + var tmp: [32]u8 = undefined; const digits = std.fmt.bufPrint(&tmp, "{d}", .{n}) catch unreachable; - const len = digits.len; - const first_group = len % 3; // digits in the leading group (0 means groups of 3 from start) var out_len: usize = 0; for (digits, 0..) |ch, i| { - if ((i > 0 and i == first_group) or - (i > first_group and (i - first_group) % 3 == 0)) - { + if (i > 0 and (digits.len - i) % 3 == 0) { buf[out_len] = ','; out_len += 1; } diff --git a/src/markdown.zig b/src/markdown.zig index 894ae3d..c636b4d 100644 --- a/src/markdown.zig +++ b/src/markdown.zig @@ -10,6 +10,7 @@ const std = @import("std"); const c = @import("c"); const sqlite_mod = @import("sqlite.zig"); +const visual = @import("visual.zig"); /// Write a Markdown table from SQLite query results to the given writer. /// @@ -40,7 +41,7 @@ pub fn writeMarkdown( // 2. Pass 1: Compute column widths and detect numeric columns const widths = try a.alloc(usize, ncols); for (0..ncols) |i| { - widths[i] = visualWidth(col_names[i]); + widths[i] = visual.visualWidth(col_names[i]); } const numeric = try a.alloc(bool, ncols); @memset(numeric, true); @@ -64,7 +65,7 @@ pub fn writeMarkdown( const ptr = c.sqlite3_column_text(stmt, idx); if (ptr != null) { const s = std.mem.span(@as([*:0]const u8, @ptrCast(ptr))); - const vw = visualWidth(s); + const vw = visual.visualWidth(s); if (vw > widths[i]) widths[i] = vw; } } @@ -108,14 +109,14 @@ fn writeRow( for (values, 0..) |val, i| { try writer.writeByte(' '); const w = widths[i]; - const vw = visualWidth(val); + const vw = visual.visualWidth(val); const padding = w - vw; if (right_align) { - try writeSpaces(writer, padding); + try visual.writeSpaces(writer, padding); try writer.writeAll(val); } else { try writer.writeAll(val); - try writeSpaces(writer, padding); + try visual.writeSpaces(writer, padding); } try writer.writeByte(' '); try writer.writeByte('|'); @@ -134,7 +135,7 @@ fn writeSeparator( try writer.writeByte('|'); for (widths) |w| { try writer.writeByte(' '); - try writeCharRepeated(writer, "-", w); + try visual.writeCharRepeated(writer, "-", w); try writer.writeByte(' '); try writer.writeByte('|'); } @@ -157,23 +158,23 @@ fn writeDataRow( if (c.sqlite3_column_type(stmt, idx) == c.SQLITE_NULL) { // NULL renders as empty cell if (numeric[i]) { - try writeSpaces(writer, w); + try visual.writeSpaces(writer, w); } else { - try writeSpaces(writer, w); + try visual.writeSpaces(writer, w); } } else { if (sqlite_mod.columnText(stmt, idx)) |val| { - const vw = visualWidth(val); + const vw = visual.visualWidth(val); const padding = w - vw; if (numeric[i] and val.len > 0) { - try writeSpaces(writer, padding); + try visual.writeSpaces(writer, padding); try writer.writeAll(val); } else { try writer.writeAll(val); - try writeSpaces(writer, padding); + try visual.writeSpaces(writer, padding); } } else { - try writeSpaces(writer, w); + try visual.writeSpaces(writer, w); } } try writer.writeByte(' '); @@ -182,86 +183,6 @@ fn writeDataRow( try writer.writeByte('\n'); } -// ── UTF-8 / visual-width helpers (copied from table.zig) ────────────────── - -fn utf8CharLen(first: u8) usize { - if (first < 0x80) return 1; - if (first < 0xC0) return 1; - if (first < 0xE0) return 2; - if (first < 0xF0) return 3; - if (first < 0xF8) return 4; - return 1; -} - -fn utf8DecodeRaw(bytes: []const u8) ?u21 { - return switch (bytes.len) { - 1 => bytes[0], - 2 => std.unicode.utf8Decode2(bytes[0..2].*) catch null, - 3 => std.unicode.utf8Decode3(bytes[0..3].*) catch null, - 4 => std.unicode.utf8Decode4(bytes[0..4].*) catch null, - else => null, - }; -} - -fn isWideCodepoint(cp: u21) bool { - return (cp >= 0x3400 and cp <= 0x4DBF) or - (cp >= 0x4E00 and cp <= 0x9FFF) or - (cp >= 0xAC00 and cp <= 0xD7AF) or - (cp >= 0xFF00 and cp <= 0xFFEF); -} - -fn visualWidth(s: []const u8) usize { - var width: usize = 0; - var i: usize = 0; - while (i < s.len) { - const byte_len = utf8CharLen(s[i]); - if (i + byte_len > s.len) { - width += 1; - i += 1; - continue; - } - const slice = s[i..][0..byte_len]; - const codepoint = utf8DecodeRaw(slice) orelse { - width += 1; - i += 1; - continue; - }; - if (isWideCodepoint(codepoint)) { - width += 2; - } else { - width += 1; - } - i += byte_len; - } - return width; -} - -fn writeCharRepeated(writer: *std.Io.Writer, char: []const u8, n: usize) error{WriteFailed}!void { - var buf: [256]u8 = undefined; - const char_len = char.len; - var filled: usize = 0; - while (filled + char_len <= buf.len) : (filled += char_len) { - @memcpy(buf[filled..][0..char_len], char); - } - var remaining = n; - while (remaining > 0) { - const chunk = @min(remaining, filled / char_len); - try writer.writeAll(buf[0..chunk * char_len]); - remaining -= chunk; - } -} - -const spaces_buf = " " ** 256; - -fn writeSpaces(writer: *std.Io.Writer, n: usize) error{WriteFailed}!void { - var remaining = n; - while (remaining > 0) { - const chunk = @min(remaining, spaces_buf.len); - try writer.writeAll(spaces_buf[0..chunk]); - remaining -= chunk; - } -} - test "writeMarkdown parameter order" { try std.testing.expect(true); } diff --git a/src/modes/columns.zig b/src/modes/columns.zig index bafd02d..6791d25 100644 --- a/src/modes/columns.zig +++ b/src/modes/columns.zig @@ -12,6 +12,7 @@ const inference_buffer_size = loader.inference_buffer_size; const ExitCode = args_mod.ExitCode; const fatal = @import("../sqlite.zig").fatal; const readAllInput = @import("../sqlite.zig").readAllInput; +const source = @import("source.zig"); pub fn runColumns( allocator: std.mem.Allocator, @@ -30,13 +31,9 @@ pub fn runColumns( .csv, .tsv => { const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter; var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &source_reader.interface, col_delim); const header_record = csv_reader.nextRecord() catch |err| switch (err) { @@ -98,13 +95,9 @@ pub fn runColumns( }, .json => { var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); const input = readAllInput(&source_reader.interface, allocator, stderr_writer, "JSON input"); defer allocator.free(input); @@ -132,13 +125,9 @@ pub fn runColumns( }, .ndjson => { var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); // Read until we find a non-empty line var line_num: usize = 0; @@ -182,13 +171,9 @@ pub fn runColumns( }, .xml => { var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); const names = xml_mod.getXmlColumnNames(allocator, &source_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer); defer { diff --git a/src/modes/sample.zig b/src/modes/sample.zig index 2ef063e..44aefbb 100644 --- a/src/modes/sample.zig +++ b/src/modes/sample.zig @@ -12,6 +12,7 @@ const inference_buffer_size = loader.inference_buffer_size; const ExitCode = args_mod.ExitCode; const fatal = @import("../sqlite.zig").fatal; +const source = @import("source.zig"); pub fn runSample( allocator: std.mem.Allocator, @@ -36,13 +37,9 @@ pub fn runSample( .csv, .tsv => { const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter; var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &source_reader.interface, col_delim); const header_record = csv_reader.nextRecord() catch |err| switch (err) { diff --git a/src/modes/source.zig b/src/modes/source.zig new file mode 100644 index 0000000..ae2aa93 --- /dev/null +++ b/src/modes/source.zig @@ -0,0 +1,30 @@ +//! Shared helpers for opening input sources in mode commands. +//! +//! Provides `SourceFile` — a file handle with a `needs_close` flag, +//! so callers can uniformly handle file-or-stdin sources. + +const std = @import("std"); +const fatal = @import("../sqlite.zig").fatal; + +/// Result of opening a file-or-stdin input source. +pub const SourceFile = struct { + file: std.Io.File, + needs_close: bool, + + /// Close the file if it was opened from a path (no-op for stdin). + pub fn deinit(self: SourceFile, io: std.Io) void { + if (self.needs_close) std.Io.File.close(self.file, io); + } +}; + +/// Open a file or stdin, handling errors uniformly. +/// +/// `input_source` must be a tagged union with `.file: []const u8` and `.stdin` variants. +pub fn openInput(input_source: anytype, io: std.Io, stderr_writer: *std.Io.Writer) SourceFile { + const source_file = switch (input_source) { + .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| + fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), + .stdin => std.Io.File.stdin(), + }; + return .{ .file = source_file, .needs_close = input_source == .file }; +} diff --git a/src/modes/validate.zig b/src/modes/validate.zig index 26d0702..b798579 100644 --- a/src/modes/validate.zig +++ b/src/modes/validate.zig @@ -15,6 +15,7 @@ const inference_buffer_size = loader.inference_buffer_size; const ExitCode = args_mod.ExitCode; const fatal = @import("../sqlite.zig").fatal; const readAllInput = @import("../sqlite.zig").readAllInput; +const source = @import("source.zig"); pub fn runValidate( allocator: std.mem.Allocator, @@ -33,13 +34,9 @@ pub fn runValidate( .csv, .tsv => { const col_delim: []const u8 = if (args.input_format == .tsv) "\t" else args.delimiter; var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); var csv_reader = csv_mod.csvReaderWithDelimiter(allocator, &source_reader.interface, col_delim); const header_record = csv_reader.nextRecord() catch |err| switch (err) { @@ -154,13 +151,9 @@ pub fn runValidate( }, .json => { var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); const input = readAllInput(&source_reader.interface, allocator, stderr_writer, "JSON input"); defer allocator.free(input); @@ -204,13 +197,9 @@ pub fn runValidate( }, .ndjson => { var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); var line_num: usize = 0; var row_count: usize = 0; @@ -290,13 +279,9 @@ pub fn runValidate( }, .xml => { var read_buf: [4096]u8 = undefined; - const source_file = switch (input_source) { - .file => |path| std.Io.Dir.openFile(std.Io.Dir.cwd(), io, path, .{}) catch |err| - fatal("cannot open file '{s}': {s}", stderr_writer, .csv_error, .{ path, @errorName(err) }), - .stdin => std.Io.File.stdin(), - }; - defer if (input_source == .file) std.Io.File.close(source_file, io); - var source_reader = std.Io.File.reader(source_file, io, &read_buf); + const opened = source.openInput(input_source, io, stderr_writer); + defer opened.deinit(io); + var source_reader = std.Io.File.reader(opened.file, io, &read_buf); const summary = xml_mod.summarizeXml(allocator, &source_reader.interface, args.xml_root_input, args.xml_row_input, stderr_writer); defer { diff --git a/src/table.zig b/src/table.zig index a6e57dc..563fc78 100644 --- a/src/table.zig +++ b/src/table.zig @@ -11,6 +11,7 @@ const std = @import("std"); const c = @import("c"); const sqlite_mod = @import("sqlite.zig"); +const visual = @import("visual.zig"); /// Write a formatted table from SQLite query results to the given writer. /// @@ -43,7 +44,7 @@ pub fn writeTable( const widths = try a.alloc(usize, ncols); // Initialize with column name visual widths for (0..ncols) |i| { - widths[i] = visualWidth(col_names[i]); + widths[i] = visual.visualWidth(col_names[i]); } const numeric = try a.alloc(bool, ncols); @memset(numeric, true); @@ -68,7 +69,7 @@ pub fn writeTable( const ptr = c.sqlite3_column_text(stmt, idx); if (ptr != null) { const s = std.mem.span(@as([*:0]const u8, @ptrCast(ptr))); - const vw = visualWidth(s); + const vw = visual.visualWidth(s); if (vw > widths[i]) widths[i] = vw; } else { // Non-NULL type but null text pointer (shouldn't happen, but handle gracefully) @@ -140,7 +141,7 @@ fn writeBorder( try writer.writeAll(left); for (widths, 0..) |w, i| { // Each column segment: ─ repeated (w + 2) times - try writeCharRepeated(writer, "─", w + 2); + try visual.writeCharRepeated(writer, "─", w + 2); if (i < widths.len - 1) { try writer.writeAll(cross); } @@ -159,10 +160,10 @@ fn writeHeaderRow( for (values, 0..) |val, i| { try writer.writeByte(' '); const w = widths[i]; - const vw = visualWidth(val); + const vw = visual.visualWidth(val); const padding = w - vw; try writer.writeAll(val); - try writeSpaces(writer, padding); + try visual.writeSpaces(writer, padding); try writer.writeByte(' '); try writer.writeAll("│"); } @@ -186,26 +187,26 @@ fn writeDataRow( // Show NULL text distinct from empty string const null_text = "NULL"; if (numeric[i]) { - try writeSpaces(writer, w - null_text.len); + try visual.writeSpaces(writer, w - null_text.len); try writer.writeAll(null_text); } else { try writer.writeAll(null_text); - try writeSpaces(writer, w - null_text.len); + try visual.writeSpaces(writer, w - null_text.len); } } else { if (sqlite_mod.columnText(stmt, idx)) |val| { - const vw = visualWidth(val); + const vw = visual.visualWidth(val); const padding = w - vw; if (numeric[i] and val.len > 0) { - try writeSpaces(writer, padding); + try visual.writeSpaces(writer, padding); try writer.writeAll(val); } else { try writer.writeAll(val); - try writeSpaces(writer, padding); + try visual.writeSpaces(writer, padding); } } else { // Shouldn't happen for non-NULL types, but handle empty - try writeSpaces(writer, w); + try visual.writeSpaces(writer, w); } } try writer.writeByte(' '); @@ -214,100 +215,6 @@ fn writeDataRow( try writer.writeByte('\n'); } -/// Return the byte length of a UTF-8 character from its leading byte. -fn utf8CharLen(first: u8) usize { - if (first < 0x80) return 1; - if (first < 0xC0) return 1; // continuation or invalid byte — treat as single - if (first < 0xE0) return 2; - if (first < 0xF0) return 3; - if (first < 0xF8) return 4; - return 1; // invalid byte -} - -/// Decode a raw UTF-8 sequence (1–4 bytes) into a codepoint, or null on error. -fn utf8DecodeRaw(bytes: []const u8) ?u21 { - return switch (bytes.len) { - 1 => bytes[0], - 2 => std.unicode.utf8Decode2(bytes[0..2].*) catch null, - 3 => std.unicode.utf8Decode3(bytes[0..3].*) catch null, - 4 => std.unicode.utf8Decode4(bytes[0..4].*) catch null, - else => null, - }; -} - -/// Check whether a codepoint is wide (display width 2 in a terminal). -fn isWideCodepoint(cp: u21) bool { - return (cp >= 0x3400 and cp <= 0x4DBF) or - (cp >= 0x4E00 and cp <= 0x9FFF) or - (cp >= 0xAC00 and cp <= 0xD7AF) or - (cp >= 0xFF00 and cp <= 0xFFEF); -} - -/// Compute the visual display width of a UTF-8 string. -/// -/// Returns the number of terminal columns the string occupies: -/// - ASCII (0x00–0x7F): width 1 -/// - CJK Unified Ideographs (0x4E00–0x9FFF): width 2 -/// - CJK Extension A (0x3400–0x4DBF): width 2 -/// - Fullwidth Forms (0xFF00–0xFFEF): width 2 -/// - Hangul Syllables (0xAC00–0xD7AF): width 2 -/// - Everything else: width 1 (conservative estimate) -/// -/// On decode errors, advances one byte and assumes width 1. -fn visualWidth(s: []const u8) usize { - var width: usize = 0; - var i: usize = 0; - while (i < s.len) { - const byte_len = utf8CharLen(s[i]); - if (i + byte_len > s.len) { - width += 1; - i += 1; - continue; - } - const slice = s[i..][0..byte_len]; - const codepoint = utf8DecodeRaw(slice) orelse { - width += 1; - i += 1; - continue; - }; - if (isWideCodepoint(codepoint)) { - width += 2; - } else { - width += 1; - } - i += byte_len; - } - return width; -} - -/// Helper: write a multi-byte UTF-8 character repeated n times. -fn writeCharRepeated(writer: *std.Io.Writer, char: []const u8, n: usize) error{WriteFailed}!void { - var buf: [256]u8 = undefined; - const char_len = char.len; - var filled: usize = 0; - while (filled + char_len <= buf.len) : (filled += char_len) { - @memcpy(buf[filled..][0..char_len], char); - } - var remaining = n; - while (remaining > 0) { - const chunk = @min(remaining, filled / char_len); - try writer.writeAll(buf[0..chunk * char_len]); - remaining -= chunk; - } -} - -const spaces_buf = " " ** 256; - -/// Write n space characters efficiently using a pre-filled buffer. -fn writeSpaces(writer: *std.Io.Writer, n: usize) error{WriteFailed}!void { - var remaining = n; - while (remaining > 0) { - const chunk = @min(remaining, spaces_buf.len); - try writer.writeAll(spaces_buf[0..chunk]); - remaining -= chunk; - } -} - /// Check if a string represents a numeric value (integer or floating-point). /// Handles optional leading sign, decimal point, and scientific notation. fn isNumericString(s: []const u8) bool { @@ -369,37 +276,6 @@ test "isNumericString" { try t.expect(!isNumericString("")); } -test "visualWidth ASCII" { - const t = std.testing; - try t.expectEqual(@as(usize, 0), visualWidth("")); - try t.expectEqual(@as(usize, 5), visualWidth("Hello")); - try t.expectEqual(@as(usize, 3), visualWidth("abc")); -} - -test "visualWidth CJK" { - const t = std.testing; - // Each CJK character has width 2 - try t.expectEqual(@as(usize, 6), visualWidth("你好世界")); // 3 chars x 2 = 6 - try t.expectEqual(@as(usize, 2), visualWidth("中")); - try t.expectEqual(@as(usize, 4), visualWidth("中文")); -} - -test "visualWidth mixed" { - const t = std.testing; - // "Hello" (5) + "世界" (4) = 9 - try t.expectEqual(@as(usize, 9), visualWidth("Hello世界")); - // "a" (1) + "中" (2) + "b" (1) = 4 - try t.expectEqual(@as(usize, 4), visualWidth("a中b")); -} - -test "visualWidth invalid UTF-8" { - const t = std.testing; - // Invalid continuation byte treated as width 1 - try t.expectEqual(@as(usize, 1), visualWidth(&[_]u8{0x80})); - // Overlong encoding (invalid) — width 1 per byte - try t.expectEqual(@as(usize, 1), visualWidth(&[_]u8{0xC0, 0x80})); -} - test "writeTable parameter order" { // Verify the public API compiles with the correct parameter order: // writeTable(allocator, writer, stmt, col_count) diff --git a/src/visual.zig b/src/visual.zig new file mode 100644 index 0000000..186c55b --- /dev/null +++ b/src/visual.zig @@ -0,0 +1,131 @@ +//! UTF-8 display-width helpers shared by table.zig and markdown.zig. +//! +//! Provides: +//! utf8CharLen, utf8DecodeRaw, isWideCodepoint — low-level UTF-8 inspection +//! visualWidth — terminal display width of a string (CJK = 2, ASCII = 1) +//! writeCharRepeated, writeSpaces — efficient repeated-string output + +const std = @import("std"); + +/// Return the byte length of a UTF-8 character from its leading byte. +fn utf8CharLen(first: u8) usize { + if (first < 0x80) return 1; + if (first < 0xC0) return 1; // continuation or invalid byte — treat as single + if (first < 0xE0) return 2; + if (first < 0xF0) return 3; + if (first < 0xF8) return 4; + return 1; // invalid byte +} + +/// Decode a raw UTF-8 sequence (1–4 bytes) into a codepoint, or null on error. +fn utf8DecodeRaw(bytes: []const u8) ?u21 { + return switch (bytes.len) { + 1 => bytes[0], + 2 => std.unicode.utf8Decode2(bytes[0..2].*) catch null, + 3 => std.unicode.utf8Decode3(bytes[0..3].*) catch null, + 4 => std.unicode.utf8Decode4(bytes[0..4].*) catch null, + else => null, + }; +} + +/// Check whether a codepoint is wide (display width 2 in a terminal). +fn isWideCodepoint(cp: u21) bool { + return (cp >= 0x3400 and cp <= 0x4DBF) or + (cp >= 0x4E00 and cp <= 0x9FFF) or + (cp >= 0xAC00 and cp <= 0xD7AF) or + (cp >= 0xFF00 and cp <= 0xFFEF); +} + +/// Compute the visual display width of a UTF-8 string. +/// +/// Returns the number of terminal columns the string occupies: +/// - ASCII (0x00–0x7F): width 1 +/// - CJK Unified Ideographs (0x4E00–0x9FFF): width 2 +/// - CJK Extension A (0x3400–0x4DBF): width 2 +/// - Fullwidth Forms (0xFF00–0xFFEF): width 2 +/// - Hangul Syllables (0xAC00–0xD7AF): width 2 +/// - Everything else: width 1 (conservative estimate) +/// +/// On decode errors, advances one byte and assumes width 1. +pub fn visualWidth(s: []const u8) usize { + var width: usize = 0; + var i: usize = 0; + while (i < s.len) { + const byte_len = utf8CharLen(s[i]); + if (i + byte_len > s.len) { + width += 1; + i += 1; + continue; + } + const slice = s[i..][0..byte_len]; + const codepoint = utf8DecodeRaw(slice) orelse { + width += 1; + i += 1; + continue; + }; + if (isWideCodepoint(codepoint)) { + width += 2; + } else { + width += 1; + } + i += byte_len; + } + return width; +} + +/// Helper: write a multi-byte UTF-8 character repeated n times. +pub fn writeCharRepeated(writer: *std.Io.Writer, char: []const u8, n: usize) error{WriteFailed}!void { + var buf: [256]u8 = undefined; + const char_len = char.len; + var filled: usize = 0; + while (filled + char_len <= buf.len) : (filled += char_len) { + @memcpy(buf[filled..][0..char_len], char); + } + var remaining = n; + while (remaining > 0) { + const chunk = @min(remaining, filled / char_len); + try writer.writeAll(buf[0..chunk * char_len]); + remaining -= chunk; + } +} + +const spaces_buf = " " ** 256; + +/// Write n space characters efficiently using a pre-filled buffer. +pub fn writeSpaces(writer: *std.Io.Writer, n: usize) error{WriteFailed}!void { + var remaining = n; + while (remaining > 0) { + const chunk = @min(remaining, spaces_buf.len); + try writer.writeAll(spaces_buf[0..chunk]); + remaining -= chunk; + } +} + +const testing = std.testing; + +test "visualWidth ASCII" { + try testing.expectEqual(@as(usize, 0), visualWidth("")); + try testing.expectEqual(@as(usize, 5), visualWidth("Hello")); + try testing.expectEqual(@as(usize, 3), visualWidth("abc")); +} + +test "visualWidth CJK" { + // Each CJK character has width 2 + try testing.expectEqual(@as(usize, 6), visualWidth("你好世界")); + try testing.expectEqual(@as(usize, 2), visualWidth("中")); + try testing.expectEqual(@as(usize, 4), visualWidth("中文")); +} + +test "visualWidth mixed" { + // "Hello" (5) + "世界" (4) = 9 + try testing.expectEqual(@as(usize, 9), visualWidth("Hello世界")); + // "a" (1) + "中" (2) + "b" (1) = 4 + try testing.expectEqual(@as(usize, 4), visualWidth("a中b")); +} + +test "visualWidth invalid UTF-8" { + // Invalid continuation byte treated as width 1 + try testing.expectEqual(@as(usize, 1), visualWidth(&[_]u8{0x80})); + // Overlong encoding (invalid) — width 1 per byte + try testing.expectEqual(@as(usize, 1), visualWidth(&[_]u8{0xC0, 0x80})); +}