From abbcd745f3d22413a631a7f67fd448f8716d2aed Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Fri, 7 Mar 2025 18:11:05 +1100 Subject: [PATCH 01/40] WIP malformed node in header --- src/check/parse/IR.zig | 15 +++++++++++++++ src/coordinate.zig | 3 +++ src/fmt.zig | 4 ++++ src/snapshot.zig | 5 +++++ src/snapshots/003.txt | 11 +++++++++-- 5 files changed, 36 insertions(+), 2 deletions(-) diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 9b902500d3..951c65e643 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -1064,6 +1064,9 @@ pub const NodeStore = struct { .region = emptyRegion(), } }; }, + .malformed => { + return .malformed; + }, else => { std.debug.panic("Expected a valid header tag, got {s}", .{@tagName(node.tag)}); }, @@ -1599,6 +1602,7 @@ pub const NodeStore = struct { // TODO: complete this region: Region, }, + malformed, const AppHeaderRhs = packed struct { num_packages: u10, num_provides: u22 }; @@ -1613,6 +1617,9 @@ pub const NodeStore = struct { return header_node; }, + .malformed => { + return sexpr.Expr.init(env.gpa, "malformed"); + }, else => @panic("not implemented"), } } @@ -1941,6 +1948,14 @@ pub const NodeStore = struct { ident_sexpr.appendStringChild(env.gpa, ir.resolve(ident.token)); return ident_sexpr; }, + .list => |a| { + var node = sexpr.Expr.init(env.gpa, "list"); + for (ir.store.exprSlice(a.items)) |b| { + var child = ir.store.getExpr(b).toSExpr(env, ir); + node.appendNodeChild(env.gpa, &child); + } + return node; + }, else => { std.debug.print("Format for Expr {}", .{self}); @panic("not implemented yet"); diff --git a/src/coordinate.zig b/src/coordinate.zig index 300a2b89c1..89d9881ac4 100644 --- a/src/coordinate.zig +++ b/src/coordinate.zig @@ -506,6 +506,9 @@ fn parseDependenciesFromPackageRoot( .start = 0, .len = 0, } }, + .malformed => { + @panic("TODO -- what should we do for a malformed node here?? raise a compiler problem I assume"); + }, }; for (parse_ast.store.recordFieldSlice(package_list)) |package_import| { diff --git a/src/fmt.zig b/src/fmt.zig index 92f1eb7e1b..40ed813eb4 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -435,6 +435,10 @@ fn formatHeader(fmt: *Formatter, hi: HeaderIdx) void { fmt.push(']'); fmt.newline(); }, + .malformed => { + fmt.pushAll(""); + fmt.newline(); + }, else => { std.debug.panic("TODO: Handle formatting {s}", .{@tagName(header)}); }, diff --git a/src/snapshot.zig b/src/snapshot.zig index 7a1a9ca147..fd1a849b64 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -304,6 +304,11 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor var parse_ast = parse.parse(&module_env, content.source); defer parse_ast.deinit(); + if (parse_ast.errors.len > 0) { + warn("file {s}: contained {d} errors, skipping", .{ snapshot_path, parse_ast.errors.len }); + return false; + } + // Format the source code var formatter = fmt.init(parse_ast); defer formatter.deinit(); diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index ba00c02ae4..06a79161f8 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -1,14 +1,21 @@ ~~~META description=Import statement ~~~SOURCE -module [decoder] + + +modAle + +[decoder] import json.Json decoder = Utf8.decode ~~~PARSE (file - (header 'decoder') + (malformed) + (ident '' 'modAle') + (list + (ident '' 'decoder')) (import 'json' '.Json' From d9702308b0e0b7f18b424ba78813e683e0f4cceb Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Sun, 9 Mar 2025 16:41:03 +1100 Subject: [PATCH 02/40] fix memory leak, add tag to malformed node --- src/check/parse/IR.zig | 24 ++++++++++++++++++++---- src/coordinate.zig | 4 +++- src/fmt.zig | 7 +++++-- src/snapshot.zig | 2 +- src/snapshots/003.txt | 3 +-- 5 files changed, 30 insertions(+), 10 deletions(-) diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 951c65e643..3d197cdf1d 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -23,6 +23,7 @@ errors: []const Diagnostic, pub fn deinit(self: *IR) void { defer self.tokens.deinit(); defer self.store.deinit(); + self.store.gpa.free(self.errors); } /// Diagnostics related to parsing @@ -1065,7 +1066,18 @@ pub const NodeStore = struct { } }; }, .malformed => { - return .malformed; + // TODO -- what should we do here? + const reason: Diagnostic.Tag = @enumFromInt(node.data.lhs); + // switch (reason) { + // .missing_header => { + // // std.debug.print("MISSING HEADER: {}\n", .{reason}); + // return .malformed; + // }, + // else => { + // @panic("ASDFASDF"); + // }, + // } + return .{ .malformed = .{ .reason = reason } }; }, else => { std.debug.panic("Expected a valid header tag, got {s}", .{@tagName(node.tag)}); @@ -1602,7 +1614,9 @@ pub const NodeStore = struct { // TODO: complete this region: Region, }, - malformed, + malformed: struct { + reason: Diagnostic.Tag, + }, const AppHeaderRhs = packed struct { num_packages: u10, num_provides: u22 }; @@ -1617,8 +1631,10 @@ pub const NodeStore = struct { return header_node; }, - .malformed => { - return sexpr.Expr.init(env.gpa, "malformed"); + .malformed => |a| { + var node = sexpr.Expr.init(env.gpa, "malformed"); + node.appendStringChild(env.gpa, @tagName(a.reason)); + return node; }, else => @panic("not implemented"), } diff --git a/src/coordinate.zig b/src/coordinate.zig index 89d9881ac4..e6d4dc07b9 100644 --- a/src/coordinate.zig +++ b/src/coordinate.zig @@ -462,6 +462,7 @@ fn findRootOfPackage( const ParsePackageDepsErr = union(enum) { failed_to_canonicalize_root_file: Filesystem.CanonicalizeError, failed_to_read_root_file: Filesystem.ReadError, + malformed_header, }; fn parseDependenciesFromPackageRoot( @@ -507,7 +508,8 @@ fn parseDependenciesFromPackageRoot( .len = 0, } }, .malformed => { - @panic("TODO -- what should we do for a malformed node here?? raise a compiler problem I assume"); + // @Sam -- does this look right? + return ParsePackageDepsErr.malformed_header; }, }; diff --git a/src/fmt.zig b/src/fmt.zig index 40ed813eb4..866bf2e24e 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -436,8 +436,11 @@ fn formatHeader(fmt: *Formatter, hi: HeaderIdx) void { fmt.newline(); }, .malformed => { - fmt.pushAll(""); - fmt.newline(); + // TODO what should we do here? + // const node = fmt.ast.store.nodes.get(@enumFromInt(hi.id)); + // const source = fmt.ast.resolve(node.main_token); + // std.debug.print("MALFORMED SOURCE {s}\n", .{source}); + // fmt.pushAll(source); }, else => { std.debug.panic("TODO: Handle formatting {s}", .{@tagName(header)}); diff --git a/src/snapshot.zig b/src/snapshot.zig index fd1a849b64..e7a0885a23 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -305,7 +305,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor defer parse_ast.deinit(); if (parse_ast.errors.len > 0) { - warn("file {s}: contained {d} errors, skipping", .{ snapshot_path, parse_ast.errors.len }); + warn("skipping file {s} as it contains {d} errors", .{ snapshot_path, parse_ast.errors.len }); return false; } diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index 06a79161f8..d5a78d4051 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -1,7 +1,6 @@ ~~~META description=Import statement ~~~SOURCE - modAle @@ -12,7 +11,7 @@ import json.Json decoder = Utf8.decode ~~~PARSE (file - (malformed) + (malformed 'missing_header') (ident '' 'modAle') (list (ident '' 'decoder')) From b103e0bd8d74c3c4606044ff92ae40ed3a69f1db Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Sun, 9 Mar 2025 20:27:21 +1100 Subject: [PATCH 03/40] add error handling for header issues --- src/check/parse/IR.zig | 4 ++++ src/check/parse/Parser.zig | 28 +++++++++++++++++++---- src/snapshots/header_unexpected_token.txt | 4 ++++ 3 files changed, 32 insertions(+), 4 deletions(-) create mode 100644 src/snapshots/header_unexpected_token.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 3d197cdf1d..3ebf66964c 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -49,6 +49,10 @@ pub const Diagnostic = struct { expected_platform_string, expected_package_or_platform_string, expected_package_platform_close_curly, + expect_closing_paren, + header_expected_open_bracket, + header_unexpected_token, + header_expected_close_bracket, }; }; diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 1efe44ea09..2be8b31542 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -155,7 +155,8 @@ pub fn parseFile(self: *Parser) void { } } - std.debug.assert(self.store.scratch_statements.items.len > 0); + // TODO: fix me, blows up on empty input + // std.debug.assert(self.store.scratch_statements.items.len > 0); _ = self.store.addFile(.{ .header = header, @@ -221,13 +222,25 @@ fn parseModuleHeader(self: *Parser) IR.NodeStore.HeaderIdx { // Get exposes if (self.peek() != .OpenSquare) { - std.debug.panic("TODO: Handle header with no exposes open bracket: {s}", .{@tagName(self.peek())}); + // std.debug.panic("TODO: Handle header with no exposes open bracket: {s}", .{@tagName(self.peek())}); + const reason: IR.Diagnostic.Tag = .header_expected_open_bracket; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + return self.store.addHeader(.{ .malformed = .{ .reason = reason } }); } self.advance(); const scratch_top = self.store.scratchTokenTop(); while (self.peek() != .CloseSquare) { if (self.peek() != .LowerIdent and self.peek() != .UpperIdent) { - std.debug.panic("TODO: Handler header bad exposes contents: {s}", .{@tagName(self.peek())}); + // std.debug.panic("TODO: Handler header bad exposes contents: {s}", .{@tagName(self.peek())}); + const reason: IR.Diagnostic.Tag = .header_unexpected_token; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + return self.store.addHeader(.{ .malformed = .{ .reason = reason } }); } self.store.addScratchToken(self.pos); @@ -239,8 +252,15 @@ fn parseModuleHeader(self: *Parser) IR.NodeStore.HeaderIdx { self.advance(); } if (self.peek() != .CloseSquare) { - std.debug.panic("TODO: Handle Bad header no closing exposes bracket: {s}", .{@tagName(self.peek())}); + // std.debug.panic("TODO: Handle Bad header no closing exposes bracket: {s}", .{@tagName(self.peek())}); + const reason: IR.Diagnostic.Tag = .header_expected_close_bracket; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + return self.store.addHeader(.{ .malformed = .{ .reason = reason } }); } + const exposes = self.store.tokenSpanFrom(scratch_top); self.advance(); diff --git a/src/snapshots/header_unexpected_token.txt b/src/snapshots/header_unexpected_token.txt new file mode 100644 index 0000000000..3511c5907c --- /dev/null +++ b/src/snapshots/header_unexpected_token.txt @@ -0,0 +1,4 @@ +~~~META +description= +~~~SOURCE +module[% From 9d2c81172542213d83d28d4590b7ac507a756ce8 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Sun, 9 Mar 2025 20:57:41 +1100 Subject: [PATCH 04/40] add parse errors to snapshots --- src/check/parse/IR.zig | 14 +++++ src/snapshot.zig | 64 ++++++++++++++--------- src/snapshots/003.txt | 12 ++--- src/snapshots/header_unexpected_token.txt | 3 ++ src/snapshots/some_folder/002.txt | 6 +++ 5 files changed, 68 insertions(+), 31 deletions(-) diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 3ebf66964c..730a24334c 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -54,6 +54,20 @@ pub const Diagnostic = struct { header_unexpected_token, header_expected_close_bracket, }; + + // TODO this is a hack just to get something in the snapshots... + pub fn not_terrible_error(self: Diagnostic, source: []const u8, writer: anytype) !void { + + // this is definitely not right... are these token indexes or source bytes? + const start: u32 = self.region.start; + const end: u32 = self.region.end; + const snippet = source[start..end]; + + try writer.writeAll("PARSE ERROR "); + try writer.writeAll(@tagName(self.tag)); + try writer.writeAll(snippet); + try writer.writeAll("\n"); + } }; /// The first and last token consumed by a Node diff --git a/src/snapshot.zig b/src/snapshot.zig index e7a0885a23..28e820c49a 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -149,18 +149,21 @@ const Section = union(enum) { source, formatted, parse, + parser_errors, pub const META = "~~~META"; pub const SOURCE = "~~~SOURCE"; pub const FORMATTED = "~~~FORMATTED"; pub const PARSE = "~~~PARSE"; + pub const PARSE_ERRORS = "~~~PARSE_ERRORS"; fn next(self: Section) ?Section { return switch (self) { .meta => .source, .source => .formatted, .formatted => .parse, - .parse => null, + .parse => .parser_errors, + .parser_errors => null, }; } @@ -169,6 +172,7 @@ const Section = union(enum) { if (std.mem.eql(u8, str, SOURCE)) return .source; if (std.mem.eql(u8, str, FORMATTED)) return .formatted; if (std.mem.eql(u8, str, PARSE)) return .parse; + if (std.mem.eql(u8, str, PARSE_ERRORS)) return .parser_errors; return null; } @@ -178,6 +182,7 @@ const Section = union(enum) { .source => SOURCE, .formatted => FORMATTED, .parse => PARSE, + .parser_errors => PARSE_ERRORS, .None => "", }; } @@ -215,10 +220,6 @@ const Content = struct { }; } - fn has_formatted_section(self: Content) bool { - return self.formatted != null; - } - fn from_ranges(ranges: std.AutoHashMap(Section, Section.Range), content: []const u8) Error!Content { var meta: []const u8 = undefined; var source: []const u8 = undefined; @@ -281,7 +282,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor } // Parse the file to find section boundaries - const content = extractSections(gpa, file_content) catch |err| { + var content = extractSections(gpa, file_content) catch |err| { switch (err) { Error.MissingSnapshotHeader, Error.MissingSnapshotSource => { warn("ignoring file {s}: {s}", .{ snapshot_path, @errorName(err) }); @@ -304,22 +305,25 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor var parse_ast = parse.parse(&module_env, content.source); defer parse_ast.deinit(); - if (parse_ast.errors.len > 0) { - warn("skipping file {s} as it contains {d} errors", .{ snapshot_path, parse_ast.errors.len }); - return false; - } - - // Format the source code - var formatter = fmt.init(parse_ast); - defer formatter.deinit(); - const formatted_output = formatter.formatFile(); - defer gpa.free(formatted_output); - // shouldn't be required in future parse_ast.store.emptyScratch(); - // Write the new AST to the parse section - try parse_ast.toSExprStr(&module_env, parse_buffer.writer().any()); + const has_parse_errors = parse_ast.errors.len > 0; + // if (parse_ast.errors.len > 0) { + // warn("skipping file {s} as it contains {d} errors", .{ snapshot_path, parse_ast.errors.len }); + // return false; + // } + + if (!has_parse_errors) { + // Write the new AST to the parse section + try parse_ast.toSExprStr(&module_env, parse_buffer.writer().any()); + + // Format the source code + var formatter = fmt.init(parse_ast); + defer formatter.deinit(); + content.formatted = formatter.formatFile(); + // defer gpa.free(formatted_output); + } // Rewrite the file with updated sections var file = std.fs.cwd().createFile(snapshot_path, .{}) catch |err| { @@ -335,26 +339,36 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor // If there's an explicit FORMATTED section, keep the source as-is // and update the FORMATTED section - if (content.has_formatted_section()) { + if (content.formatted != null) { try file.writer().writeAll(Section.SOURCE); try file.writer().writeAll("\n"); try file.writer().writeAll(content.source); try file.writer().writeAll("\n"); try file.writer().writeAll(Section.FORMATTED); try file.writer().writeAll("\n"); - try file.writer().writeAll(formatted_output); + try file.writer().writeAll(content.formatted.?); try file.writer().writeAll("\n"); + + gpa.free(content.formatted.?); } else { // Otherwise, update SOURCE directly with the formatted output try file.writer().writeAll(Section.SOURCE); try file.writer().writeAll("\n"); - try file.writer().writeAll(formatted_output); + try file.writer().writeAll(content.source); try file.writer().writeAll("\n"); } - try file.writer().writeAll(Section.PARSE); - try file.writer().writeAll("\n"); - try file.writer().writeAll(parse_buffer.items); + if (!has_parse_errors) { + try file.writer().writeAll(Section.PARSE); + try file.writer().writeAll("\n"); + try file.writer().writeAll(parse_buffer.items); + } else { + try file.writer().writeAll(Section.PARSE_ERRORS); + try file.writer().writeAll("\n"); + for (parse_ast.errors) |err| { + try err.not_terrible_error(content.source, file); + } + } // If flag --fuzz-corpus is passed, so write the SOURCE to our corpus if (maybe_fuzz_corpus_path != null) { diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index d5a78d4051..8aa72cae8f 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -1,20 +1,20 @@ ~~~META description=Import statement ~~~SOURCE +module [decoder] -modAle +import json.Json -[decoder] +decoder = Utf8.decode +~~~FORMATTED +module [decoder] import json.Json decoder = Utf8.decode ~~~PARSE (file - (malformed 'missing_header') - (ident '' 'modAle') - (list - (ident '' 'decoder')) + (header 'decoder') (import 'json' '.Json' diff --git a/src/snapshots/header_unexpected_token.txt b/src/snapshots/header_unexpected_token.txt index 3511c5907c..38abdc3e0e 100644 --- a/src/snapshots/header_unexpected_token.txt +++ b/src/snapshots/header_unexpected_token.txt @@ -2,3 +2,6 @@ description= ~~~SOURCE module[% +~~~PARSE_ERRORS +PARSE ERROR header_unexpected_token +PARSE ERROR unexpected_token diff --git a/src/snapshots/some_folder/002.txt b/src/snapshots/some_folder/002.txt index 9cdffe412d..128a258bbd 100644 --- a/src/snapshots/some_folder/002.txt +++ b/src/snapshots/some_folder/002.txt @@ -5,6 +5,12 @@ module [foo, bar] foo = "one" +bar = "two" +~~~FORMATTED +module [foo, bar] + +foo = "one" + bar = "two" ~~~PARSE (file From f6ae028c6e7f045ad6990b5a920c69dc3331a2fe Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 08:48:53 +1100 Subject: [PATCH 05/40] handle unexpected token in Pattern --- src/check/parse/IR.zig | 16 +++++++++++++++- src/check/parse/Parser.zig | 10 +++++++++- src/fmt.zig | 9 ++++----- src/snapshots/fuzz_crash_001.txt | 7 +++++++ 4 files changed, 35 insertions(+), 7 deletions(-) create mode 100644 src/snapshots/fuzz_crash_001.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 730a24334c..af983869ff 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -53,6 +53,7 @@ pub const Diagnostic = struct { header_expected_open_bracket, header_unexpected_token, header_expected_close_bracket, + pattern_unexpected_token, }; // TODO this is a hack just to get something in the snapshots... @@ -615,7 +616,9 @@ pub const NodeStore = struct { node.data.lhs = mod.exposes.span.start; node.data.rhs = mod.exposes.span.len; }, - else => {}, + else => { + // TODO -- should this be ignoring other header types?? + }, } const nid = store.nodes.append(store.gpa, node); return .{ .id = @intFromEnum(nid) }; @@ -748,6 +751,9 @@ pub const NodeStore = struct { node.data.lhs = a.patterns.span.start; node.data.rhs = a.patterns.span.len; }, + .malformed => { + node.tag = .malformed; + }, } const nid = store.nodes.append(store.gpa, node); return .{ .id = @intFromEnum(nid) }; @@ -1853,6 +1859,9 @@ pub const NodeStore = struct { patterns: PatternSpan, region: Region, }, + malformed: struct { + reason: Diagnostic.Tag, + }, pub fn toSExpr(self: @This(), env: *base.ModuleEnv, ir: *IR) sexpr.Expr { switch (self) { @@ -1863,6 +1872,11 @@ pub const NodeStore = struct { return node; }, + .malformed => |a| { + var node = sexpr.Expr.init(env.gpa, "malformed"); + node.appendStringChild(env.gpa, @tagName(a.reason)); + return node; + }, else => @panic("formatting for this pattern not yet implemented"), } } diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 2be8b31542..6f51fd140e 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -684,7 +684,15 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt .region = .{ .start = start, .end = self.pos }, } }); }, - else => std.debug.panic("TODO: Handle parsing pattern starting with: {s}", .{@tagName(self.peek())}), + else => { + // std.debug.panic("TODO: Handle parsing pattern starting with: {s}", .{@tagName(self.peek())}) + const reason: IR.Diagnostic.Tag = .pattern_unexpected_token; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + return self.store.addPattern(.{ .malformed = .{ .reason = reason } }); + }, } if (pattern) |p| { diff --git a/src/fmt.zig b/src/fmt.zig index 866bf2e24e..2050127632 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -382,6 +382,9 @@ fn formatPattern(fmt: *Formatter, pi: PatternIdx) void { i += 1; } }, + .malformed => { + // TODO how should we format a malformed here? + }, } } @@ -436,11 +439,7 @@ fn formatHeader(fmt: *Formatter, hi: HeaderIdx) void { fmt.newline(); }, .malformed => { - // TODO what should we do here? - // const node = fmt.ast.store.nodes.get(@enumFromInt(hi.id)); - // const source = fmt.ast.resolve(node.main_token); - // std.debug.print("MALFORMED SOURCE {s}\n", .{source}); - // fmt.pushAll(source); + // TODO how should we format a malformed here? }, else => { std.debug.panic("TODO: Handle formatting {s}", .{@tagName(header)}); diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt new file mode 100644 index 0000000000..6541b278a0 --- /dev/null +++ b/src/snapshots/fuzz_crash_001.txt @@ -0,0 +1,7 @@ +~~~META +description=crashes +~~~SOURCE +mo|% +~~~PARSE_ERRORS +PARSE ERROR pattern_unexpected_token +PARSE ERROR unexpected_token From 35cd1e8dd94616ac96de0d4a489dca18f0c4f433 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 09:09:47 +1100 Subject: [PATCH 06/40] fix fuzz crash ty_anno_unexpected_token, handle EOF more gracefully --- crates/compiler/builtins/bitcode/build.zig | 6 +-- crates/compiler/builtins/bitcode/src/list.zig | 6 +-- src/check/parse/IR.zig | 7 ++++ src/check/parse/Parser.zig | 40 ++++++++++++------- src/fmt.zig | 3 ++ src/snapshots/fuzz_crash_001.txt | 2 +- src/snapshots/fuzz_crash_002.txt | 23 +++++++++++ 7 files changed, 65 insertions(+), 22 deletions(-) create mode 100644 src/snapshots/fuzz_crash_002.txt diff --git a/crates/compiler/builtins/bitcode/build.zig b/crates/compiler/builtins/bitcode/build.zig index fb76a5de61..7c51dbb680 100644 --- a/crates/compiler/builtins/bitcode/build.zig +++ b/crates/compiler/builtins/bitcode/build.zig @@ -119,9 +119,9 @@ fn generateObjectFile( const suffix = if (target.result.os.tag == std.Target.Os.Tag.windows) - "obj" - else - "o"; + "obj" + else + "o"; const install = b.addInstallFile(obj_file, b.fmt("{s}.{s}", .{ object_name, suffix })); const obj_step = b.step(step_name, "Build object file for linking"); diff --git a/crates/compiler/builtins/bitcode/src/list.zig b/crates/compiler/builtins/bitcode/src/list.zig index b461dd74ce..bc1b219b94 100644 --- a/crates/compiler/builtins/bitcode/src/list.zig +++ b/crates/compiler/builtins/bitcode/src/list.zig @@ -517,9 +517,9 @@ pub fn listSwap( const source_ptr = @as([*]u8, @ptrCast(newList.bytes)); swapElements(source_ptr, element_width, @as(usize, - // We already verified that both indices are less than the stored list length, - // which is usize, so casting them to usize will definitely be lossless. - @intCast(index_1)), @as(usize, @intCast(index_2)), copy); + // We already verified that both indices are less than the stored list length, + // which is usize, so casting them to usize will definitely be lossless. + @intCast(index_1)), @as(usize, @intCast(index_2)), copy); return newList; } diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index af983869ff..7868a999b3 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -54,6 +54,7 @@ pub const Diagnostic = struct { header_unexpected_token, header_expected_close_bracket, pattern_unexpected_token, + ty_anno_unexpected_token, }; // TODO this is a hack just to get something in the snapshots... @@ -1036,6 +1037,9 @@ pub const NodeStore = struct { node.tag = .ty_parens; node.data.lhs = p.anno.id; }, + .malformed => { + node.tag = .malformed; + }, } const nid = store.nodes.append(store.gpa, node); @@ -1806,6 +1810,9 @@ pub const NodeStore = struct { anno: TypeAnnoIdx, region: Region, }, + malformed: struct { + reason: Diagnostic.Tag, + }, const TagUnionRhs = packed struct { open: u1, tags_len: u31 }; }; diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 6f51fd140e..09053d2ec1 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -117,7 +117,9 @@ pub fn pushDiagnostic(self: *Parser, tag: IR.Diagnostic.Tag, region: IR.Region) /// add a malformed token pub fn pushMalformed(self: *Parser, comptime t: type, tag: IR.Diagnostic.Tag) t { const pos = self.pos; - self.advanceOne(); // TODO: find a better point to advance to + if (self.peek() != .EndOfFile) { + self.advanceOne(); // TODO: find a better point to advance to + } self.diagnostics.append(self.gpa, .{ .tag = tag, .region = .{ .start = pos, .end = pos }, @@ -167,7 +169,7 @@ pub fn parseFile(self: *Parser) void { fn parseCollection(self: *Parser, comptime T: type, end_token: Token.Tag, scratch: *std.ArrayListUnmanaged(T), parser: fn (*Parser) T) ExpectError!usize { const scratch_top = scratch.items.len; - while (self.peek() != end_token) { + while (self.peek() != end_token and self.peek() != .EndOfFile) { scratch.append(self.gpa, parser(self)) catch |err| exitOnOom(err); self.expect(.Comma) catch { break; @@ -181,7 +183,7 @@ fn parseCollection(self: *Parser, comptime T: type, end_token: Token.Tag, scratc /// Parses the items of type T until we encounter end_token, with each item separated by a Comma token fn parseCollectionSpan(self: *Parser, comptime T: type, end_token: Token.Tag, scratch_fn: fn (*IR.NodeStore, T) void, parser: fn (*Parser) T) ExpectError!void { - while (self.peek() != end_token) { + while (self.peek() != end_token and self.peek() != .EndOfFile) { scratch_fn(&self.store, parser(self)); self.expect(.Comma) catch { break; @@ -232,7 +234,7 @@ fn parseModuleHeader(self: *Parser) IR.NodeStore.HeaderIdx { } self.advance(); const scratch_top = self.store.scratchTokenTop(); - while (self.peek() != .CloseSquare) { + while (self.peek() != .CloseSquare and self.peek() != .EndOfFile) { if (self.peek() != .LowerIdent and self.peek() != .UpperIdent) { // std.debug.panic("TODO: Handler header bad exposes contents: {s}", .{@tagName(self.peek())}); const reason: IR.Diagnostic.Tag = .header_unexpected_token; @@ -286,7 +288,7 @@ pub fn parseAppHeader(self: *Parser) IR.NodeStore.HeaderIdx { } self.advance(); const scratch_top = self.store.scratchTokenTop(); - while (self.peek() != .CloseSquare) { + while (self.peek() != .CloseSquare and self.peek() != .EndOfFile) { if (self.peek() != .LowerIdent and self.peek() != .UpperIdent) { self.store.clearScratchTokensFrom(scratch_top); return self.pushMalformed(IR.NodeStore.HeaderIdx, .expected_provides); @@ -313,7 +315,7 @@ pub fn parseAppHeader(self: *Parser) IR.NodeStore.HeaderIdx { return self.pushMalformed(IR.NodeStore.HeaderIdx, .expected_package_platform_open_curly); } self.advance(); - while (self.peek() != .CloseCurly) { + while (self.peek() != .CloseCurly and self.peek() != .EndOfFile) { const entry_start = self.pos; if (self.peek() != .LowerIdent) { self.store.clearScratchRecordFieldsFrom(fields_scratch_top); @@ -745,7 +747,7 @@ pub fn parsePatternRecordField(self: *Parser, alternatives: Alternatives) IR.Nod }); } if (self.peek() != .LowerIdent) { - while (self.peek() != .CloseCurly) { + while (self.peek() != .CloseCurly and self.peek() != .EndOfFile) { self.advance(); } return self.pushMalformed(IR.NodeStore.PatternRecordFieldIdx, .unexpected_token); @@ -754,7 +756,7 @@ pub fn parsePatternRecordField(self: *Parser, alternatives: Alternatives) IR.Nod self.advance(); var value: ?IR.NodeStore.PatternIdx = null; if (self.peek() != .OpColon and (self.peekNext() != .Comma or self.peekNext() != .CloseCurly)) { - while (self.peek() != .CloseCurly) { + while (self.peek() != .CloseCurly and self.peek() != .EndOfFile) { self.advance(); } return self.pushMalformed(IR.NodeStore.PatternRecordFieldIdx, .unexpected_token); @@ -831,7 +833,7 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { self.advance(); const scratch_top = self.store.scratchExprTop(); self.parseCollectionSpan(IR.NodeStore.ExprIdx, .CloseSquare, IR.NodeStore.addScratchExpr, parseExpr) catch { - while (self.peek() != .CloseSquare) { + while (self.peek() != .CloseSquare and self.peek() != .EndOfFile) { self.advance(); } self.store.clearScratchExprsFrom(scratch_top); @@ -848,7 +850,7 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { // TODO: Parenthesized expressions const scratch_top = self.store.scratchExprTop(); self.parseCollectionSpan(IR.NodeStore.ExprIdx, .CloseRound, IR.NodeStore.addScratchExpr, parseExpr) catch { - while (self.peek() != .CloseRound) { + while (self.peek() != .CloseRound and self.peek() != .EndOfFile) { self.advance(); } self.store.clearScratchExprsFrom(scratch_top); @@ -935,7 +937,7 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { return self.pushMalformed(IR.NodeStore.ExprIdx, .unexpected_token); }; const scratch_top = self.store.scratchWhenBranchTop(); - while (self.peek() != .CloseCurly) { + while (self.peek() != .CloseCurly and self.peek() != .EndOfFile) { self.store.addScratchWhenBranch(self.parseBranch()); if (self.peek() == .Comma) { self.advance(); @@ -1219,7 +1221,7 @@ pub fn parseTypeAnno(self: *Parser, looking_for_args: TyFnArgs) IR.NodeStore.Typ self.advance(); // Advance past OpenRound const after_round = self.pos; const scratch_top = self.store.scratchTypeAnnoTop(); - while (self.peek() != .CloseRound and self.peek() != .OpArrow and self.peek() != .OpFatArrow) { + while (self.peek() != .CloseRound and self.peek() != .OpArrow and self.peek() != .OpFatArrow and self.peek() != .EndOfFile) { // Looking for args here so that we don't capture an un-parenthesized fn's args self.store.addScratchTypeAnno(self.parseTypeAnno(.looking_for_args)); if (self.peek() != .Comma) { @@ -1293,7 +1295,15 @@ pub fn parseTypeAnno(self: *Parser, looking_for_args: TyFnArgs) IR.NodeStore.Typ self.advance(); // Advance past Underscore }, else => { - std.debug.panic("Could not parse type annotation, got {s}@{d}", .{ @tagName(self.peek()), self.pos }); + // std.debug.panic("Could not parse type annotation, got {s}@{d}", .{ @tagName(self.peek()), self.pos }); + const reason: IR.Diagnostic.Tag = .ty_anno_unexpected_token; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + // anno = self.store.addTypeAnno(.{ .malformed = .{ .reason = reason } }); + // self.advance(); + return self.store.addTypeAnno(.{ .malformed = .{ .reason = reason } }); }, } @@ -1333,7 +1343,7 @@ pub fn parseTypeAnnoInCollection(self: *Parser) IR.NodeStore.TypeAnnoIdx { pub fn parseAnnoRecordField(self: *Parser) IR.NodeStore.AnnoRecordFieldIdx { const field_start = self.pos; if (self.peek() != .LowerIdent) { - while (self.peek() != .CloseCurly and self.peek() != .Comma) { + while (self.peek() != .CloseCurly and self.peek() != .Comma and self.peek() != .EndOfFile) { self.advance(); // Advance until we end this field or the record } return self.pushMalformed(IR.NodeStore.AnnoRecordFieldIdx, .unexpected_token); @@ -1341,7 +1351,7 @@ pub fn parseAnnoRecordField(self: *Parser) IR.NodeStore.AnnoRecordFieldIdx { const name = self.pos; self.advance(); // Advance past LowerIdent if (self.peek() != .OpColon) { - while (self.peek() != .CloseCurly and self.peek() != .Comma) { + while (self.peek() != .CloseCurly and self.peek() != .Comma and self.peek() != .EndOfFile) { self.advance(); // Advance until we end this field or the record } return self.pushMalformed(IR.NodeStore.AnnoRecordFieldIdx, .unexpected_token); diff --git a/src/fmt.zig b/src/fmt.zig index 2050127632..27f8c38702 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -569,6 +569,9 @@ fn formatTypeAnno(fmt: *Formatter, anno: IR.NodeStore.TypeAnnoIdx) void { .underscore => |_| { fmt.push('_'); }, + .malformed => { + // TODO how should we format a malformed here? + }, } } diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index 6541b278a0..0d8262f0b3 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -1,5 +1,5 @@ ~~~META -description=crashes +description=fuzz crash ~~~SOURCE mo|% ~~~PARSE_ERRORS diff --git a/src/snapshots/fuzz_crash_002.txt b/src/snapshots/fuzz_crash_002.txt new file mode 100644 index 0000000000..f443135e0f --- /dev/null +++ b/src/snapshots/fuzz_crash_002.txt @@ -0,0 +1,23 @@ +~~~META +description=fuzz crash +~~~SOURCE +modu:;::::::::::::::le[% +~~~PARSE_ERRORS +PARSE ERROR ty_anno_unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token From eeae3cd5bca8e460f06f15a6e5bbe6cd9571196e Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 09:30:57 +1100 Subject: [PATCH 07/40] fix crash in the middle of string parsing --- src/check/parse/IR.zig | 21 +++++++++++++++++++-- src/check/parse/Parser.zig | 17 +++++++++-------- src/fmt.zig | 4 ++++ src/snapshots/fuzz_crash_003.txt | 6 ++++++ 4 files changed, 38 insertions(+), 10 deletions(-) create mode 100644 src/snapshots/fuzz_crash_003.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 7868a999b3..36731c2826 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -55,6 +55,8 @@ pub const Diagnostic = struct { header_expected_close_bracket, pattern_unexpected_token, ty_anno_unexpected_token, + statement_unexpected_eof, + string_unexpected_token, }; // TODO this is a hack just to get something in the snapshots... @@ -688,6 +690,11 @@ pub const NodeStore = struct { node.data.lhs = a.name; node.data.rhs = a.anno.id; }, + .malformed => |m| { + node.tag = .malformed; + node.data.lhs = @intFromEnum(m.reason); + node.data.rhs = 0; + }, } const nid = store.nodes.append(store.gpa, node); return .{ .id = @intFromEnum(nid) }; @@ -752,8 +759,10 @@ pub const NodeStore = struct { node.data.lhs = a.patterns.span.start; node.data.rhs = a.patterns.span.len; }, - .malformed => { + .malformed => |a| { node.tag = .malformed; + node.data.lhs = @intFromEnum(a.reason); + node.data.rhs = 0; }, } const nid = store.nodes.append(store.gpa, node); @@ -1037,8 +1046,10 @@ pub const NodeStore = struct { node.tag = .ty_parens; node.data.lhs = p.anno.id; }, - .malformed => { + .malformed => |a| { node.tag = .malformed; + node.data.lhs = @intFromEnum(a.reason); + node.data.rhs = 0; }, } @@ -1703,6 +1714,9 @@ pub const NodeStore = struct { anno: TypeAnnoIdx, region: Region, }, + malformed: struct { + reason: Diagnostic.Tag, + }, pub const Import = struct { module_name_tok: TokenIdx, @@ -1970,6 +1984,9 @@ pub const NodeStore = struct { region: Region, }, block: Body, + malformed: struct { + reason: Diagnostic.Tag, + }, pub fn as_string_part_region(self: @This()) !Region { switch (self) { diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 09053d2ec1..9261fe8f02 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -144,9 +144,6 @@ pub fn parseFile(self: *Parser) void { const scratch_top = self.store.scratchStatementTop(); while (self.peek() != .EndOfFile) { - if (self.peek() == .EndOfFile) { - break; - } const current_scratch_top = self.store.scratchStatementTop(); if (self.parseStmt()) |idx| { std.debug.assert(self.store.scratchStatementTop() == current_scratch_top); @@ -1092,7 +1089,7 @@ pub fn parseStringExpr(self: *Parser) IR.NodeStore.ExprIdx { // StringStart, StringPart, OpenStringInterpolation, , CloseStringInterpolation, StringPart, StringEnd self.advanceOne(); const scratch_top = self.store.scratchExprTop(); - while (true) { + while (self.peek() != .EndOfFile) { switch (self.peek()) { .StringEnd => { self.advanceOne(); @@ -1117,8 +1114,14 @@ pub fn parseStringExpr(self: *Parser) IR.NodeStore.ExprIdx { }, else => { // Something is broken in the tokenizer if we get here! - std.debug.print("Unexpected token in string: {s}\n", .{@tagName(self.peek())}); - unreachable; + // std.debug.print("Unexpected token in string: {s}\n", .{@tagName(self.peek())}); + // unreachable; + const reason: IR.Diagnostic.Tag = .string_unexpected_token; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + return self.store.addExpr(.{ .malformed = .{ .reason = reason } }); }, } } @@ -1301,8 +1304,6 @@ pub fn parseTypeAnno(self: *Parser, looking_for_args: TyFnArgs) IR.NodeStore.Typ .start = self.pos, .end = self.pos, }); - // anno = self.store.addTypeAnno(.{ .malformed = .{ .reason = reason } }); - // self.advance(); return self.store.addTypeAnno(.{ .malformed = .{ .reason = reason } }); }, } diff --git a/src/fmt.zig b/src/fmt.zig index 27f8c38702..eea266562a 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -106,6 +106,10 @@ fn formatStatement(fmt: *Formatter, si: StatementIdx) NewlineBehavior { fmt.formatExpr(r.expr); return .extra_newline_needed; }, + .malformed => { + // TODO how should we format a malformed here? + return .no_extra_newline; + }, } } diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt new file mode 100644 index 0000000000..238cede03a --- /dev/null +++ b/src/snapshots/fuzz_crash_003.txt @@ -0,0 +1,6 @@ +~~~META +description=fuzz crash +~~~SOURCE + = "te +~~~PARSE_ERRORS +PARSE ERROR unexpected_token From 62d208dc602008683c495640c97ba6bddcffd7f3 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 09:32:34 +1100 Subject: [PATCH 08/40] fix crash in the middle of string parsing --- src/check/parse/IR.zig | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 36731c2826..1424fe9a10 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -889,6 +889,11 @@ pub const NodeStore = struct { .ellipsis => |_| { node.tag = .ellipsis; }, + .malformed => |m| { + node.tag = .malformed; + node.data.lhs = @intFromEnum(m.reason); + node.data.rhs = 0; + }, } const nid = store.nodes.append(store.gpa, node); return .{ .id = @intFromEnum(nid) }; From f6b94b74bfd61ecaa8ba1bfe74e541298d2fb8b0 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 09:36:34 +1100 Subject: [PATCH 09/40] fix crash for file with just a single uppercase character --- src/check/parse/IR.zig | 10 ++++++++++ src/snapshots/fuzz_crash_004.txt | 11 +++++++++++ 2 files changed, 21 insertions(+) create mode 100644 src/snapshots/fuzz_crash_004.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 1424fe9a10..73a4f6c946 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -2033,6 +2033,16 @@ pub const NodeStore = struct { } return node; }, + .tag => |a| { + var node = sexpr.Expr.init(env.gpa, "tag"); + node.appendStringChild(env.gpa, ir.resolve(a.token)); + return node; + }, + .malformed => |a| { + var node = sexpr.Expr.init(env.gpa, "malformed"); + node.appendStringChild(env.gpa, @tagName(a.reason)); + return node; + }, else => { std.debug.print("Format for Expr {}", .{self}); @panic("not implemented yet"); diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt new file mode 100644 index 0000000000..53cc6b1a43 --- /dev/null +++ b/src/snapshots/fuzz_crash_004.txt @@ -0,0 +1,11 @@ +~~~META +description=fuzz crash +~~~SOURCE +F +~~~FORMATTED + +F +~~~PARSE +(file + (malformed 'missing_header') + (tag 'F')) \ No newline at end of file From 8c7527773870a493e1299649e6313a1bad309674 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 09:47:43 +1100 Subject: [PATCH 10/40] fix crash and memory leak in tokenize reporting --- src/check/parse.zig | 2 +- src/check/parse/IR.zig | 1 + src/check/parse/Parser.zig | 8 +++++++- src/snapshots/fuzz_crash_005.txt | 11 +++++++++++ 4 files changed, 20 insertions(+), 2 deletions(-) create mode 100644 src/snapshots/fuzz_crash_005.txt diff --git a/src/check/parse.zig b/src/check/parse.zig index 2703e2f3de..250b14c90a 100644 --- a/src/check/parse.zig +++ b/src/check/parse.zig @@ -82,7 +82,7 @@ fn tokenizeReport(allocator: std.mem.Allocator, source: []const u8, msgs: []cons std.debug.print( "({d}:{d}-{d}:{d}) Expected the correct closing brace here:\n{s}\n{s}^\n", - .{ start_line_num, start_col, end_line_num, end_col, src, spaces.toOwnedSlice() catch |err| exitOnOom(err) }, + .{ start_line_num, start_col, end_line_num, end_col, src, spaces.items }, ); }, else => { diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 73a4f6c946..b9f96ba818 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -57,6 +57,7 @@ pub const Diagnostic = struct { ty_anno_unexpected_token, statement_unexpected_eof, string_unexpected_token, + expr_if_missing_else, }; // TODO this is a hack just to get something in the snapshots... diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 9261fe8f02..341033d7a1 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -915,7 +915,13 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { const condition = self.parseExpr(); const then = self.parseExpr(); if (self.peek() != .KwElse) { - std.debug.panic("TODO: problem for no else {s}@{d}", .{ @tagName(self.peek()), self.pos }); + // std.debug.panic("TODO: problem for no else {s}@{d}", .{ @tagName(self.peek()), self.pos }); + const reason: IR.Diagnostic.Tag = .expr_if_missing_else; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + return self.store.addExpr(.{ .malformed = .{ .reason = reason } }); } self.advance(); const else_idx = self.parseExpr(); diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt new file mode 100644 index 0000000000..9f1c67130e --- /dev/null +++ b/src/snapshots/fuzz_crash_005.txt @@ -0,0 +1,11 @@ +~~~META +description=fuzz crash +~~~SOURCE +modu +~~~FORMATTED + +modu +~~~PARSE +(file + (malformed 'missing_header') + (ident '' 'modu')) \ No newline at end of file From 0ef4b16081059c5691924609cc027458c8f77b42 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 10:08:32 +1100 Subject: [PATCH 11/40] dont add a newline if we have a malformed header --- src/fmt.zig | 31 +++++++++++++++++++++++++------ src/snapshots/fuzz_crash_004.txt | 1 - src/snapshots/fuzz_crash_005.txt | 1 - 3 files changed, 25 insertions(+), 8 deletions(-) diff --git a/src/fmt.zig b/src/fmt.zig index eea266562a..76612dbe0c 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -45,15 +45,28 @@ pub fn resetWith(fmt: *Formatter, ast: IR) void { /// Emits a string containing the well-formed source of a Roc parse IR (AST). /// The resulting string is owned by the caller. pub fn formatFile(fmt: *Formatter) []const u8 { + var ignore_newline_for_first_statement = false; + fmt.ast.store.emptyScratch(); const file = fmt.ast.store.getFile(); - fmt.formatHeader(file.header); + const maybe_output = fmt.formatHeader(file.header); + if (maybe_output == FormattedOutput.nothing_formatted) { + ignore_newline_for_first_statement = true; + } var newline_behavior: NewlineBehavior = .extra_newline_needed; for (fmt.ast.store.statementSlice(file.statements)) |s| { - fmt.ensureNewline(); - if (newline_behavior == .extra_newline_needed) { - fmt.newline(); + + // If there was nothing formatted because the header was malformed, + // then we don't want to add a newline + if (ignore_newline_for_first_statement) { + ignore_newline_for_first_statement = false; + } else { + fmt.ensureNewline(); + if (newline_behavior == .extra_newline_needed) { + fmt.newline(); + } } + newline_behavior = fmt.formatStatement(s); } return fmt.buffer.toOwnedSlice(fmt.gpa) catch |err| exitOnOom(err); @@ -392,7 +405,10 @@ fn formatPattern(fmt: *Formatter, pi: PatternIdx) void { } } -fn formatHeader(fmt: *Formatter, hi: HeaderIdx) void { +/// The caller may need to know if anything was formatted, to handle newlines correctly. +const FormattedOutput = enum { something_formatted, nothing_formatted }; + +fn formatHeader(fmt: *Formatter, hi: HeaderIdx) FormattedOutput { const header = fmt.ast.store.getHeader(hi); switch (header) { .app => |a| { @@ -428,6 +444,7 @@ fn formatHeader(fmt: *Formatter, hi: HeaderIdx) void { } fmt.pushAll(" }"); fmt.newline(); + return FormattedOutput.something_formatted; }, .module => |m| { fmt.pushAll("module ["); @@ -441,9 +458,11 @@ fn formatHeader(fmt: *Formatter, hi: HeaderIdx) void { } fmt.push(']'); fmt.newline(); + return FormattedOutput.something_formatted; }, .malformed => { - // TODO how should we format a malformed here? + // we have a malformed header... don't output anything as no header was parsed + return FormattedOutput.nothing_formatted; }, else => { std.debug.panic("TODO: Handle formatting {s}", .{@tagName(header)}); diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt index 53cc6b1a43..00b7dd30f5 100644 --- a/src/snapshots/fuzz_crash_004.txt +++ b/src/snapshots/fuzz_crash_004.txt @@ -3,7 +3,6 @@ description=fuzz crash ~~~SOURCE F ~~~FORMATTED - F ~~~PARSE (file diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt index 9f1c67130e..1fb9683644 100644 --- a/src/snapshots/fuzz_crash_005.txt +++ b/src/snapshots/fuzz_crash_005.txt @@ -3,7 +3,6 @@ description=fuzz crash ~~~SOURCE modu ~~~FORMATTED - modu ~~~PARSE (file From 3299a3e1a4e7a8e893001a9ecd82a5a20221585b Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 10:17:05 +1100 Subject: [PATCH 12/40] fix out of bounds in parse.lineNum() and tokenize reporting --- src/check/parse.zig | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/src/check/parse.zig b/src/check/parse.zig index 250b14c90a..668c521efe 100644 --- a/src/check/parse.zig +++ b/src/check/parse.zig @@ -43,13 +43,20 @@ pub fn parse(env: *base.ModuleEnv, source: []const u8) IR { fn lineNum(newlines: std.ArrayList(usize), pos: u32) u32 { const pos_usize = @as(usize, @intCast(pos)); + + if (newlines.items.len == 0) { + return 0; + } + var lineno: u32 = 0; - while (lineno < newlines.items.len) { + + while (lineno + 1 < newlines.items.len) { if (newlines.items[lineno + 1] > pos_usize) { return lineno; } lineno += 1; } + return lineno; } @@ -73,7 +80,12 @@ fn tokenizeReport(allocator: std.mem.Allocator, source: []const u8, msgs: []cons const end_line_num = lineNum(newlines, message.end); const end_col = message.end - newlines.items[end_line_num]; - const src = source[newlines.items[start_line_num]..newlines.items[end_line_num + 1]]; + const end_index = if (end_line_num + 1 < newlines.items.len) + end_line_num + 1 + else + end_line_num; + + const src = source[newlines.items[start_line_num]..newlines.items[end_index]]; var spaces = std.ArrayList(u8).init(allocator); defer spaces.deinit(); for (0..start_col) |_| { From 383ad560a18e6c0ee042cfef6bdc9df45130586b Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 10:47:41 +1100 Subject: [PATCH 13/40] fix crash for malformed lambda --- src/check/parse/IR.zig | 100 ++++++++++++++++++++++++++++++- src/snapshots/fuzz_crash_006.txt | Bin 0 -> 185 bytes 2 files changed, 99 insertions(+), 1 deletion(-) create mode 100644 src/snapshots/fuzz_crash_006.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index b9f96ba818..590a4f5162 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -2026,6 +2026,7 @@ pub const NodeStore = struct { ident_sexpr.appendStringChild(env.gpa, ir.resolve(ident.token)); return ident_sexpr; }, + // (list []) .list => |a| { var node = sexpr.Expr.init(env.gpa, "list"); for (ir.store.exprSlice(a.items)) |b| { @@ -2034,18 +2035,102 @@ pub const NodeStore = struct { } return node; }, + // (tag ) .tag => |a| { var node = sexpr.Expr.init(env.gpa, "tag"); node.appendStringChild(env.gpa, ir.resolve(a.token)); return node; }, + // (malformed ) .malformed => |a| { var node = sexpr.Expr.init(env.gpa, "malformed"); node.appendStringChild(env.gpa, @tagName(a.reason)); return node; }, + // (int ) + .int => |a| { + var node = sexpr.Expr.init(env.gpa, "int"); + node.appendStringChild(env.gpa, ir.resolve(a.token)); + return node; + }, + // (float ) + .float => |a| { + var node = sexpr.Expr.init(env.gpa, "float"); + node.appendStringChild(env.gpa, ir.resolve(a.token)); + return node; + }, + // (tuple []) + .tuple => |a| { + var node = sexpr.Expr.init(env.gpa, "tuple"); + + for (ir.store.exprSlice(a.items)) |item| { + var child = ir.store.getExpr(item).toSExpr(env, ir); + node.appendNodeChild(env.gpa, &child); + } + + return node; + }, + // (record [(field ?optional)]) + .record => |a| { + var node = sexpr.Expr.init(env.gpa, "record"); + + for (ir.store.recordFieldSlice(a.fields)) |field_idx| { + const record_field = ir.store.getRecordField(field_idx); + var record_field_node = sexpr.Expr.init(env.gpa, "field"); + record_field_node.appendStringChild(env.gpa, ir.resolve(record_field.name)); + if (record_field.value != null) { + var value_node = ir.store.getExpr(record_field.value.?).toSExpr(env, ir); + record_field_node.appendNodeChild(env.gpa, &value_node); + } + if (record_field.optional) { + record_field_node.appendStringChild(env.gpa, "optional"); + } + node.appendNodeChild(env.gpa, &record_field_node); + } + + return node; + }, + // (apply []) + .apply => |a| { + var node = sexpr.Expr.init(env.gpa, "apply"); + var apply_fn = ir.store.getExpr(a.@"fn").toSExpr(env, ir); + node.appendNodeChild(env.gpa, &apply_fn); + + for (ir.store.exprSlice(a.args)) |arg| { + var arg_node = ir.store.getExpr(arg).toSExpr(env, ir); + node.appendNodeChild(env.gpa, &arg_node); + } + + return node; + }, + .field_access => |a| { + var node = sexpr.Expr.init(env.gpa, "field_access"); + var child = a.toSExpr(env, ir); + node.appendNodeChild(env.gpa, &child); + return node; + }, + .bin_op => |a| { + return a.toSExpr(env, ir); + }, + .lambda => |a| { + var node = sexpr.Expr.init(env.gpa, "lambda"); + + // arguments + var args = sexpr.Expr.init(env.gpa, "args"); + for (ir.store.patternSlice(a.args)) |arg| { + var arg_node = ir.store.getPattern(arg).toSExpr(env, ir); + args.appendNodeChild(env.gpa, &arg_node); + } + node.appendNodeChild(env.gpa, &args); + + // body + var body = ir.store.getExpr(a.body).toSExpr(env, ir); + node.appendNodeChild(env.gpa, &body); + + return node; + }, else => { - std.debug.print("Format for Expr {}", .{self}); + std.debug.print("\n\n toSExpr not implement for Expr {}\n\n", .{self}); @panic("not implemented yet"); }, } @@ -2081,6 +2166,19 @@ pub const NodeStore = struct { right: ExprIdx, operator: TokenIdx, region: Region, + + /// (binop ) e.g. (binop '+' 1 2) + pub fn toSExpr(self: *const @This(), env: *base.ModuleEnv, ir: *IR) sexpr.Expr { + var node = sexpr.Expr.init(env.gpa, "binop"); + node.appendStringChild(env.gpa, ir.resolve(self.operator)); + + var left = ir.store.getExpr(self.left).toSExpr(env, ir); + node.appendNodeChild(env.gpa, &left); + + var right = ir.store.getExpr(self.right).toSExpr(env, ir); + node.appendNodeChild(env.gpa, &right); + return node; + } }; pub const Unary = struct { operator: TokenIdx, diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt new file mode 100644 index 0000000000000000000000000000000000000000..0c53bc939360cd8b03f741a5d3ed0d5499ec3911 GIT binary patch literal 185 zcmXwwu?~VT6h)oi;xD}I(T{K-TEjwW0B2KafhH78iwvacx2q-I?c95AjIrcR4odsh znb%_G>D0Zq4O=Ju05Qf|+^aK(Lm|R3tiR?esxogHet|*&gn+5N&&PDzsal| xyV+T-kgW0EO#NffQfWtQ7Z%wg72=Phv`;0|l>XB7em4^urKbgnkXB-heE?d4HA(;g literal 0 HcmV?d00001 From 138975a615774cf55c5c8057965852a9b768461b Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 10:49:59 +1100 Subject: [PATCH 14/40] fix fuzz crash for expr_no_space_dot_int --- src/check/parse/IR.zig | 1 + src/check/parse/Parser.zig | 8 +++++++- src/snapshots/fuzz_crash_007.txt | 8 ++++++++ 3 files changed, 16 insertions(+), 1 deletion(-) create mode 100644 src/snapshots/fuzz_crash_007.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 590a4f5162..a55227d38d 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -58,6 +58,7 @@ pub const Diagnostic = struct { statement_unexpected_eof, string_unexpected_token, expr_if_missing_else, + expr_no_space_dot_int, }; // TODO this is a hack just to get something in the snapshots... diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 341033d7a1..3a50ffe287 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -980,7 +980,13 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { while (self.peek() == .NoSpaceDotInt or self.peek() == .NoSpaceDotLowerIdent) { const tok = self.peek(); if (tok == .NoSpaceDotInt) { // NoSpaceDotInt - std.debug.panic("TODO: Handle NoSpaceDotInt case", .{}); + // std.debug.panic("TODO: Handle NoSpaceDotInt case", .{}); + const reason: IR.Diagnostic.Tag = .expr_no_space_dot_int; + self.pushDiagnostic(reason, .{ + .start = self.pos, + .end = self.pos, + }); + return self.store.addExpr(.{ .malformed = .{ .reason = reason } }); } else { // NoSpaceDotLowerIdent const s = self.pos; const ident = self.store.addExpr(.{ .ident = .{ diff --git a/src/snapshots/fuzz_crash_007.txt b/src/snapshots/fuzz_crash_007.txt new file mode 100644 index 0000000000..e38b765255 --- /dev/null +++ b/src/snapshots/fuzz_crash_007.txt @@ -0,0 +1,8 @@ +~~~META +description=fuzz crash +~~~SOURCE +ff8.8.d +~~~PARSE_ERRORS +PARSE ERROR expr_no_space_dot_int +PARSE ERROR unexpected_token +PARSE ERROR unexpected_token From 6121f2a6df70b6bac54c79fc7d08735597cc4f6c Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 11:03:28 +1100 Subject: [PATCH 15/40] only generate FORMATTED section if expected --- src/snapshot.zig | 13 ++++--------- src/snapshots/fuzz_crash_004.txt | 2 -- src/snapshots/fuzz_crash_005.txt | 2 -- src/snapshots/fuzz_crash_008.txt | 12 ++++++++++++ 4 files changed, 16 insertions(+), 13 deletions(-) create mode 100644 src/snapshots/fuzz_crash_008.txt diff --git a/src/snapshot.zig b/src/snapshot.zig index 28e820c49a..7c33429188 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -292,8 +292,6 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor } }; - // std.debug.print("FILE: {s}\n{}\n", .{ snapshot_path, content }); - // Generate the PARSE section var parse_buffer = std.ArrayList(u8).init(gpa); defer parse_buffer.deinit(); @@ -309,20 +307,17 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor parse_ast.store.emptyScratch(); const has_parse_errors = parse_ast.errors.len > 0; - // if (parse_ast.errors.len > 0) { - // warn("skipping file {s} as it contains {d} errors", .{ snapshot_path, parse_ast.errors.len }); - // return false; - // } + // Write the new AST to the parse section if (!has_parse_errors) { - // Write the new AST to the parse section try parse_ast.toSExprStr(&module_env, parse_buffer.writer().any()); + } - // Format the source code + // Format the source code + if (!has_parse_errors and content.formatted != null) { var formatter = fmt.init(parse_ast); defer formatter.deinit(); content.formatted = formatter.formatFile(); - // defer gpa.free(formatted_output); } // Rewrite the file with updated sections diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt index 00b7dd30f5..069d68a1ef 100644 --- a/src/snapshots/fuzz_crash_004.txt +++ b/src/snapshots/fuzz_crash_004.txt @@ -2,8 +2,6 @@ description=fuzz crash ~~~SOURCE F -~~~FORMATTED -F ~~~PARSE (file (malformed 'missing_header') diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt index 1fb9683644..2fa050373d 100644 --- a/src/snapshots/fuzz_crash_005.txt +++ b/src/snapshots/fuzz_crash_005.txt @@ -2,8 +2,6 @@ description=fuzz crash ~~~SOURCE modu -~~~FORMATTED -modu ~~~PARSE (file (malformed 'missing_header') diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt new file mode 100644 index 0000000000..de6d398c80 --- /dev/null +++ b/src/snapshots/fuzz_crash_008.txt @@ -0,0 +1,12 @@ +~~~META +description=fuzz crash +~~~SOURCE +||1 +~~~FORMATTED +|| 1 +~~~PARSE +(file + (malformed 'missing_header') + (lambda + (args) + (int '1'))) \ No newline at end of file From cc4a451d3b02c9406be54191daedd0e5c815dd6a Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 11:05:09 +1100 Subject: [PATCH 16/40] simplify snapshot logic for writing to file --- src/snapshot.zig | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) diff --git a/src/snapshot.zig b/src/snapshot.zig index 7c33429188..7902f34de0 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -332,25 +332,17 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor try file.writer().writeAll(content.meta); try file.writer().writeAll("\n"); - // If there's an explicit FORMATTED section, keep the source as-is - // and update the FORMATTED section + try file.writer().writeAll(Section.SOURCE); + try file.writer().writeAll("\n"); + try file.writer().writeAll(content.source); + try file.writer().writeAll("\n"); + if (content.formatted != null) { - try file.writer().writeAll(Section.SOURCE); - try file.writer().writeAll("\n"); - try file.writer().writeAll(content.source); - try file.writer().writeAll("\n"); try file.writer().writeAll(Section.FORMATTED); try file.writer().writeAll("\n"); try file.writer().writeAll(content.formatted.?); try file.writer().writeAll("\n"); - gpa.free(content.formatted.?); - } else { - // Otherwise, update SOURCE directly with the formatted output - try file.writer().writeAll(Section.SOURCE); - try file.writer().writeAll("\n"); - try file.writer().writeAll(content.source); - try file.writer().writeAll("\n"); } if (!has_parse_errors) { From ea83e5bde094cb349f2f4d1b464c1c179dada4f9 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 11:38:39 +1100 Subject: [PATCH 17/40] add TOKENS section to snapshots --- src/check/parse/Parser.zig | 1 + src/fmt.zig | 4 ++- src/snapshot.zig | 29 +++++++++++++++++++++- src/snapshots/001.txt | 1 + src/snapshots/003.txt | 1 + src/snapshots/fuzz_crash_001.txt | 2 ++ src/snapshots/fuzz_crash_002.txt | 2 ++ src/snapshots/fuzz_crash_003.txt | 2 ++ src/snapshots/fuzz_crash_004.txt | 2 ++ src/snapshots/fuzz_crash_005.txt | 2 ++ src/snapshots/fuzz_crash_006.txt | Bin 185 -> 239 bytes src/snapshots/fuzz_crash_007.txt | 2 ++ src/snapshots/fuzz_crash_008.txt | 12 +++------ src/snapshots/header_unexpected_token.txt | 2 ++ src/snapshots/some_folder/002.txt | 1 + 15 files changed, 53 insertions(+), 10 deletions(-) diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 3a50ffe287..0efaed211d 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -903,6 +903,7 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { return self.pushMalformed(IR.NodeStore.ExprIdx, .unexpected_token); }; const args = self.store.patternSpanFrom(scratch_top); + const body = self.parseExpr(); expr = self.store.addExpr(.{ .lambda = .{ .body = body, diff --git a/src/fmt.zig b/src/fmt.zig index 76612dbe0c..c650e84346 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -228,7 +228,9 @@ fn formatExpr(fmt: *Formatter, ei: ExprIdx) void { .lambda => |l| { fmt.push('|'); var i: usize = 0; - for (fmt.ast.store.patternSlice(l.args)) |arg| { + const arg_slice = fmt.ast.store.patternSlice(l.args); + + for (arg_slice) |arg| { fmt.formatPattern(arg); if (i < (l.args.span.len - 1)) { fmt.pushAll(", "); diff --git a/src/snapshot.zig b/src/snapshot.zig index 7902f34de0..f2c0384f36 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -150,18 +150,21 @@ const Section = union(enum) { formatted, parse, parser_errors, + tokens, pub const META = "~~~META"; pub const SOURCE = "~~~SOURCE"; pub const FORMATTED = "~~~FORMATTED"; pub const PARSE = "~~~PARSE"; pub const PARSE_ERRORS = "~~~PARSE_ERRORS"; + pub const TOKENS = "~~~TOKENS"; fn next(self: Section) ?Section { return switch (self) { .meta => .source, .source => .formatted, - .formatted => .parse, + .formatted => .tokens, + .tokens => .parse, .parse => .parser_errors, .parser_errors => null, }; @@ -173,6 +176,7 @@ const Section = union(enum) { if (std.mem.eql(u8, str, FORMATTED)) return .formatted; if (std.mem.eql(u8, str, PARSE)) return .parse; if (std.mem.eql(u8, str, PARSE_ERRORS)) return .parser_errors; + if (std.mem.eql(u8, str, TOKENS)) return .tokens; return null; } @@ -183,6 +187,7 @@ const Section = union(enum) { .formatted => FORMATTED, .parse => PARSE, .parser_errors => PARSE_ERRORS, + .tokens => TOKENS, .None => "", }; } @@ -345,6 +350,28 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor gpa.free(content.formatted.?); } + // Check if tokens should be included + const exclude_tokens = std.mem.indexOf(u8, content.meta, "exclude_tokens=true") != null; + if (!exclude_tokens) { + try file.writer().writeAll(Section.TOKENS); + try file.writer().writeAll("\n"); + const tokenizedBuffer = parse_ast.tokens; + const tokens = tokenizedBuffer.tokens.items(.tag); + var first = true; + for (tokens) |tok| { + + // only write a comma if not the first token + if (first) { + first = false; + } else { + try file.writer().writeAll(","); + } + + try file.writer().writeAll(@tagName(tok)); + } + try file.writer().writeAll("\n"); + } + if (!has_parse_errors) { try file.writer().writeAll(Section.PARSE); try file.writer().writeAll("\n"); diff --git a/src/snapshots/001.txt b/src/snapshots/001.txt index f9329fff63..9cb231d476 100644 --- a/src/snapshots/001.txt +++ b/src/snapshots/001.txt @@ -1,5 +1,6 @@ ~~~META description=Example to develop the snapshot methodology, includes FORMATTED section +exclude_tokens=true ~~~SOURCE module [ # some crazy formatting diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index 8aa72cae8f..f6af8897f5 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -1,5 +1,6 @@ ~~~META description=Import statement +exclude_tokens=true ~~~SOURCE module [decoder] diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index 0d8262f0b3..767d8f7ec0 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -2,6 +2,8 @@ description=fuzz crash ~~~SOURCE mo|% +~~~TOKENS +LowerIdent,OpBar,OpPercent,EndOfFile ~~~PARSE_ERRORS PARSE ERROR pattern_unexpected_token PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_002.txt b/src/snapshots/fuzz_crash_002.txt index f443135e0f..e094b8d8d6 100644 --- a/src/snapshots/fuzz_crash_002.txt +++ b/src/snapshots/fuzz_crash_002.txt @@ -2,6 +2,8 @@ description=fuzz crash ~~~SOURCE modu:;::::::::::::::le[% +~~~TOKENS +LowerIdent,OpColon,MalformedUnknownToken,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,LowerIdent,OpenSquare,OpPercent,EndOfFile ~~~PARSE_ERRORS PARSE ERROR ty_anno_unexpected_token PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt index 238cede03a..d646ff97f6 100644 --- a/src/snapshots/fuzz_crash_003.txt +++ b/src/snapshots/fuzz_crash_003.txt @@ -2,5 +2,7 @@ description=fuzz crash ~~~SOURCE = "te +~~~TOKENS +OpAssign,StringStart,StringPart,EndOfFile ~~~PARSE_ERRORS PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt index 069d68a1ef..dfd6670eb3 100644 --- a/src/snapshots/fuzz_crash_004.txt +++ b/src/snapshots/fuzz_crash_004.txt @@ -2,6 +2,8 @@ description=fuzz crash ~~~SOURCE F +~~~TOKENS +UpperIdent,EndOfFile ~~~PARSE (file (malformed 'missing_header') diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt index 2fa050373d..f3e2ff7a49 100644 --- a/src/snapshots/fuzz_crash_005.txt +++ b/src/snapshots/fuzz_crash_005.txt @@ -2,6 +2,8 @@ description=fuzz crash ~~~SOURCE modu +~~~TOKENS +LowerIdent,EndOfFile ~~~PARSE (file (malformed 'missing_header') diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index 0c53bc939360cd8b03f741a5d3ed0d5499ec3911..7f708f86c0b95bf90cd32939acfe7e16fff5a3f0 100644 GIT binary patch delta 60 zcmdnV_?~fsmr;nnx2s<;S7<>&YLRD3YF>$se}PkC5tQ`FFNX=c=B4fB*mh delta 9 QcmaFQxRY^$*TjZ=02HSL1poj5 diff --git a/src/snapshots/fuzz_crash_007.txt b/src/snapshots/fuzz_crash_007.txt index e38b765255..d0dd148704 100644 --- a/src/snapshots/fuzz_crash_007.txt +++ b/src/snapshots/fuzz_crash_007.txt @@ -2,6 +2,8 @@ description=fuzz crash ~~~SOURCE ff8.8.d +~~~TOKENS +LowerIdent,NoSpaceDotInt,NoSpaceDotLowerIdent,EndOfFile ~~~PARSE_ERRORS PARSE ERROR expr_no_space_dot_int PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt index de6d398c80..cea5bdd003 100644 --- a/src/snapshots/fuzz_crash_008.txt +++ b/src/snapshots/fuzz_crash_008.txt @@ -1,12 +1,8 @@ ~~~META description=fuzz crash ~~~SOURCE -||1 -~~~FORMATTED || 1 -~~~PARSE -(file - (malformed 'missing_header') - (lambda - (args) - (int '1'))) \ No newline at end of file +~~~TOKENS +OpOr,Int,EndOfFile +~~~PARSE_ERRORS +PARSE ERROR unexpected_token diff --git a/src/snapshots/header_unexpected_token.txt b/src/snapshots/header_unexpected_token.txt index 38abdc3e0e..18f86d4fa3 100644 --- a/src/snapshots/header_unexpected_token.txt +++ b/src/snapshots/header_unexpected_token.txt @@ -2,6 +2,8 @@ description= ~~~SOURCE module[% +~~~TOKENS +KwModule,OpenSquare,OpPercent,EndOfFile ~~~PARSE_ERRORS PARSE ERROR header_unexpected_token PARSE ERROR unexpected_token diff --git a/src/snapshots/some_folder/002.txt b/src/snapshots/some_folder/002.txt index 128a258bbd..9178320745 100644 --- a/src/snapshots/some_folder/002.txt +++ b/src/snapshots/some_folder/002.txt @@ -1,5 +1,6 @@ ~~~META description=Basic example to develop the snapshot methodology +exclude_tokens=true ~~~SOURCE module [foo, bar] From 0f1752112af480822ad780f866ba347c2d9a0739 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 11:52:30 +1100 Subject: [PATCH 18/40] only add FORMAT section if it is not the same as source --- src/snapshot.zig | 28 ++++++++++++++-------------- src/snapshots/003.txt | 6 ------ src/snapshots/fuzz_crash_008.txt | 12 +++++++++--- src/snapshots/some_folder/002.txt | 6 ------ 4 files changed, 23 insertions(+), 29 deletions(-) diff --git a/src/snapshot.zig b/src/snapshot.zig index f2c0384f36..f991e74570 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -287,7 +287,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor } // Parse the file to find section boundaries - var content = extractSections(gpa, file_content) catch |err| { + const content = extractSections(gpa, file_content) catch |err| { switch (err) { Error.MissingSnapshotHeader, Error.MissingSnapshotSource => { warn("ignoring file {s}: {s}", .{ snapshot_path, @errorName(err) }); @@ -318,13 +318,6 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor try parse_ast.toSExprStr(&module_env, parse_buffer.writer().any()); } - // Format the source code - if (!has_parse_errors and content.formatted != null) { - var formatter = fmt.init(parse_ast); - defer formatter.deinit(); - content.formatted = formatter.formatFile(); - } - // Rewrite the file with updated sections var file = std.fs.cwd().createFile(snapshot_path, .{}) catch |err| { log("failed to create file '{s}': {s}", .{ snapshot_path, @errorName(err) }); @@ -342,12 +335,19 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor try file.writer().writeAll(content.source); try file.writer().writeAll("\n"); - if (content.formatted != null) { - try file.writer().writeAll(Section.FORMATTED); - try file.writer().writeAll("\n"); - try file.writer().writeAll(content.formatted.?); - try file.writer().writeAll("\n"); - gpa.free(content.formatted.?); + // Format the source code + if (!has_parse_errors) { + var formatter = fmt.init(parse_ast); + defer formatter.deinit(); + const formatted = formatter.formatFile(); + defer gpa.free(formatted); + + if (!std.mem.eql(u8, formatted, content.source)) { + try file.writer().writeAll(Section.FORMATTED); + try file.writer().writeAll("\n"); + try file.writer().writeAll(formatted); + try file.writer().writeAll("\n"); + } } // Check if tokens should be included diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index f6af8897f5..d2e8292ad8 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -6,12 +6,6 @@ module [decoder] import json.Json -decoder = Utf8.decode -~~~FORMATTED -module [decoder] - -import json.Json - decoder = Utf8.decode ~~~PARSE (file diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt index cea5bdd003..f9cda0b80a 100644 --- a/src/snapshots/fuzz_crash_008.txt +++ b/src/snapshots/fuzz_crash_008.txt @@ -1,8 +1,14 @@ ~~~META description=fuzz crash ~~~SOURCE +||1 +~~~FORMATTED || 1 ~~~TOKENS -OpOr,Int,EndOfFile -~~~PARSE_ERRORS -PARSE ERROR unexpected_token +OpBar,OpBar,Int,EndOfFile +~~~PARSE +(file + (malformed 'missing_header') + (lambda + (args) + (int '1'))) \ No newline at end of file diff --git a/src/snapshots/some_folder/002.txt b/src/snapshots/some_folder/002.txt index 9178320745..2be28c6d80 100644 --- a/src/snapshots/some_folder/002.txt +++ b/src/snapshots/some_folder/002.txt @@ -6,12 +6,6 @@ module [foo, bar] foo = "one" -bar = "two" -~~~FORMATTED -module [foo, bar] - -foo = "one" - bar = "two" ~~~PARSE (file From dbc2daf22aad4ead4a434283a71f97dc7bc0a53a Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 11:58:44 +1100 Subject: [PATCH 19/40] format lambda expressions with no args using a space as a workaround for ambiguity --- src/fmt.zig | 7 +++++++ src/snapshots/fuzz_crash_006.txt | Bin 239 -> 240 bytes src/snapshots/fuzz_crash_008.txt | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) diff --git a/src/fmt.zig b/src/fmt.zig index c650e84346..e56ad80c95 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -230,6 +230,13 @@ fn formatExpr(fmt: *Formatter, ei: ExprIdx) void { var i: usize = 0; const arg_slice = fmt.ast.store.patternSlice(l.args); + // TODO -- this is a hack to avoid ambiguity with no arguments, + // if we parse it again without the space it will be parsed as + // a logical OR `||` instead + if (arg_slice.len == 0) { + fmt.pushAll(" "); + } + for (arg_slice) |arg| { fmt.formatPattern(arg); if (i < (l.args.span.len - 1)) { diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index 7f708f86c0b95bf90cd32939acfe7e16fff5a3f0..d01e963885d4d1b1fceba4ac50264e95cf44c5b1 100644 GIT binary patch delta 11 ScmaFQ_ Date: Mon, 10 Mar 2025 14:32:16 +1100 Subject: [PATCH 20/40] unify Problems across stages for snapshot reporting --- src/check/parse.zig | 8 +++++ src/check/parse/IR.zig | 14 -------- src/check/parse/tokenize.zig | 3 ++ src/problem.zig | 29 +++++++++++++++++ src/snapshot.zig | 30 +++++++++-------- src/snapshots/fuzz_crash_001.txt | 6 ++-- src/snapshots/fuzz_crash_002.txt | 38 +++++++++++----------- src/snapshots/fuzz_crash_003.txt | 5 +-- src/snapshots/fuzz_crash_006.txt | Bin 240 -> 350 bytes src/snapshots/fuzz_crash_007.txt | 8 ++--- src/snapshots/fuzz_crash_008.txt | 2 ++ src/snapshots/fuzz_crash_009.txt | 28 ++++++++++++++++ src/snapshots/header_unexpected_token.txt | 6 ++-- 13 files changed, 119 insertions(+), 58 deletions(-) create mode 100644 src/snapshots/fuzz_crash_009.txt diff --git a/src/check/parse.zig b/src/check/parse.zig index 668c521efe..7c488cc31f 100644 --- a/src/check/parse.zig +++ b/src/check/parse.zig @@ -26,11 +26,19 @@ pub fn parse(env: *base.ModuleEnv, source: []const u8) IR { tokenizeReport(env.gpa, source, result.messages); } + for (result.messages) |msg| { + _ = env.problems.append(env.gpa, .{ .tokenize = msg }); + } + var parser = Parser.init(result.tokens); defer parser.deinit(); parser.parseFile(); + for (parser.diagnostics.items) |msg| { + _ = env.problems.append(env.gpa, .{ .parser = msg }); + } + const errors = parser.diagnostics.toOwnedSlice(env.gpa) catch |err| exitOnOom(err); return .{ diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index a55227d38d..6cc1b7e9f4 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -60,20 +60,6 @@ pub const Diagnostic = struct { expr_if_missing_else, expr_no_space_dot_int, }; - - // TODO this is a hack just to get something in the snapshots... - pub fn not_terrible_error(self: Diagnostic, source: []const u8, writer: anytype) !void { - - // this is definitely not right... are these token indexes or source bytes? - const start: u32 = self.region.start; - const end: u32 = self.region.end; - const snippet = source[start..end]; - - try writer.writeAll("PARSE ERROR "); - try writer.writeAll(@tagName(self.tag)); - try writer.writeAll(snippet); - try writer.writeAll("\n"); - } }; /// The first and last token consumed by a Node diff --git a/src/check/parse/tokenize.zig b/src/check/parse/tokenize.zig index badc5abf87..baa566ac92 100644 --- a/src/check/parse/tokenize.zig +++ b/src/check/parse/tokenize.zig @@ -807,6 +807,7 @@ pub const Tokenizer = struct { self.cursor.pos += 1; return; } + std.debug.print("LAST {?}\nBRACE: {?}\n", .{ last, brace }); switch (last.?) { .round => { if (brace != .round) { @@ -1098,9 +1099,11 @@ pub const Tokenizer = struct { self.consumeBraceCloseAndContinueStringInterp(.round); }, ']' => { + std.debug.print("Closing SQUARE brace at position {}\n", .{self.cursor.pos}); self.consumeBraceCloseAndContinueStringInterp(.square); }, close_curly => { + std.debug.print("Closing CURLY brace at position {}\n", .{self.cursor.pos}); self.consumeBraceCloseAndContinueStringInterp(.curly); }, diff --git a/src/problem.zig b/src/problem.zig index 7a33406cab..b68b4146bb 100644 --- a/src/problem.zig +++ b/src/problem.zig @@ -6,6 +6,7 @@ //! our commitment to "always inform, never block" and to boost development speed. const std = @import("std"); +const Allocator = std.mem.Allocator; const base = @import("base.zig"); const collections = @import("collections.zig"); @@ -14,6 +15,8 @@ const Region = base.Region; /// Represents a problem encountered during the compilation process. pub const Problem = union(enum) { + tokenize: @import("check/parse/tokenize.zig").Diagnostic, + parser: @import("check/parse/IR.zig").Diagnostic, canonicalize: Canonicalize, compiler: Compiler, @@ -74,4 +77,30 @@ pub const Problem = union(enum) { pub const List = collections.SafeList(@This()); /// An index into a list of problems. pub const Idx = List.Idx; + + /// Format a `Problem` for display. + pub fn format(self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { + _ = fmt; + _ = options; + + // use a stack allocation for printing our tag errors... + var buf: [1000]u8 = undefined; + + switch (self) { + .tokenize => |a| { + const str = try std.fmt.bufPrint(&buf, "{}", .{a.tag}); + try writer.writeAll(str); + }, + .parser => |a| { + const str = try std.fmt.bufPrint(&buf, "{}", .{a.tag}); + try writer.writeAll(str); + }, + .canonicalize => { + try writer.writeAll("CAN ERROR"); + }, + .compiler => { + try writer.writeAll("COMPILER ERROR"); + }, + } + } }; diff --git a/src/snapshot.zig b/src/snapshot.zig index f991e74570..0b678d6457 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -149,24 +149,24 @@ const Section = union(enum) { source, formatted, parse, - parser_errors, tokens, + problems, pub const META = "~~~META"; pub const SOURCE = "~~~SOURCE"; pub const FORMATTED = "~~~FORMATTED"; pub const PARSE = "~~~PARSE"; - pub const PARSE_ERRORS = "~~~PARSE_ERRORS"; pub const TOKENS = "~~~TOKENS"; + pub const PROBLEMS = "~~~PROBLEMS"; fn next(self: Section) ?Section { return switch (self) { .meta => .source, - .source => .formatted, + .source => .problems, + .problems => .formatted, .formatted => .tokens, .tokens => .parse, - .parse => .parser_errors, - .parser_errors => null, + .parse => .null, }; } @@ -175,8 +175,8 @@ const Section = union(enum) { if (std.mem.eql(u8, str, SOURCE)) return .source; if (std.mem.eql(u8, str, FORMATTED)) return .formatted; if (std.mem.eql(u8, str, PARSE)) return .parse; - if (std.mem.eql(u8, str, PARSE_ERRORS)) return .parser_errors; if (std.mem.eql(u8, str, TOKENS)) return .tokens; + if (std.mem.eql(u8, str, PROBLEMS)) return .problems; return null; } @@ -186,8 +186,8 @@ const Section = union(enum) { .source => SOURCE, .formatted => FORMATTED, .parse => PARSE, - .parser_errors => PARSE_ERRORS, .tokens => TOKENS, + .problems => PROBLEMS, .None => "", }; } @@ -335,6 +335,16 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor try file.writer().writeAll(content.source); try file.writer().writeAll("\n"); + if (module_env.problems.len() > 0) { + try file.writer().writeAll(Section.PROBLEMS); + try file.writer().writeAll("\n"); + var iter = module_env.problems.iterIndices(); + while (iter.next()) |problem_idx| { + try module_env.problems.get(problem_idx).format("", .{}, file); + try file.writer().writeAll("\n"); + } + } + // Format the source code if (!has_parse_errors) { var formatter = fmt.init(parse_ast); @@ -376,12 +386,6 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor try file.writer().writeAll(Section.PARSE); try file.writer().writeAll("\n"); try file.writer().writeAll(parse_buffer.items); - } else { - try file.writer().writeAll(Section.PARSE_ERRORS); - try file.writer().writeAll("\n"); - for (parse_ast.errors) |err| { - try err.not_terrible_error(content.source, file); - } } // If flag --fuzz-corpus is passed, so write the SOURCE to our corpus diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index 767d8f7ec0..c5df7f3d8e 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -2,8 +2,8 @@ description=fuzz crash ~~~SOURCE mo|% +~~~PROBLEMS +check.parse.IR.Diagnostic.Tag.pattern_unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS LowerIdent,OpBar,OpPercent,EndOfFile -~~~PARSE_ERRORS -PARSE ERROR pattern_unexpected_token -PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_002.txt b/src/snapshots/fuzz_crash_002.txt index e094b8d8d6..a3e4bda0c6 100644 --- a/src/snapshots/fuzz_crash_002.txt +++ b/src/snapshots/fuzz_crash_002.txt @@ -2,24 +2,24 @@ description=fuzz crash ~~~SOURCE modu:;::::::::::::::le[% +~~~PROBLEMS +check.parse.IR.Diagnostic.Tag.ty_anno_unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS LowerIdent,OpColon,MalformedUnknownToken,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,LowerIdent,OpenSquare,OpPercent,EndOfFile -~~~PARSE_ERRORS -PARSE ERROR ty_anno_unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt index d646ff97f6..4d5c3f3ce1 100644 --- a/src/snapshots/fuzz_crash_003.txt +++ b/src/snapshots/fuzz_crash_003.txt @@ -2,7 +2,8 @@ description=fuzz crash ~~~SOURCE = "te +~~~PROBLEMS +check.parse.tokenize.Diagnostic.Tag.UnclosedString +check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS OpAssign,StringStart,StringPart,EndOfFile -~~~PARSE_ERRORS -PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index d01e963885d4d1b1fceba4ac50264e95cf44c5b1..78a9c6d5c4f9499e35d60d7143722fc004ab6ed2 100644 GIT binary patch delta 116 zcmeysc#mmu=5OJ-tvUVd>&X0l#L ZV!EDVadKv+bADb)QGO1|8YUVX007AxE1v)W delta 9 Qcmcb|^nr1L$;3r_02R;#g#Z8m diff --git a/src/snapshots/fuzz_crash_007.txt b/src/snapshots/fuzz_crash_007.txt index d0dd148704..c1c8b5a85e 100644 --- a/src/snapshots/fuzz_crash_007.txt +++ b/src/snapshots/fuzz_crash_007.txt @@ -2,9 +2,9 @@ description=fuzz crash ~~~SOURCE ff8.8.d +~~~PROBLEMS +check.parse.IR.Diagnostic.Tag.expr_no_space_dot_int +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS LowerIdent,NoSpaceDotInt,NoSpaceDotLowerIdent,EndOfFile -~~~PARSE_ERRORS -PARSE ERROR expr_no_space_dot_int -PARSE ERROR unexpected_token -PARSE ERROR unexpected_token diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt index 0c1e651528..fba539e192 100644 --- a/src/snapshots/fuzz_crash_008.txt +++ b/src/snapshots/fuzz_crash_008.txt @@ -2,6 +2,8 @@ description=fuzz crash ~~~SOURCE ||1 +~~~PROBLEMS +check.parse.tokenize.Diagnostic.Tag.AsciiControl ~~~FORMATTED | | 1 ~~~TOKENS diff --git a/src/snapshots/fuzz_crash_009.txt b/src/snapshots/fuzz_crash_009.txt new file mode 100644 index 0000000000..75b782844c --- /dev/null +++ b/src/snapshots/fuzz_crash_009.txt @@ -0,0 +1,28 @@ +~~~META +description=fuzz crash +~~~SOURCE + f{o, + ] + +foo = + + "onmo % +~~~PROBLEMS +check.parse.tokenize.Diagnostic.Tag.MismatchedBrace +check.parse.tokenize.Diagnostic.Tag.UnclosedString +~~~FORMATTED +f + +{ o } + +foo = "onmo %" +~~~TOKENS +LowerIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile +~~~PARSE +(file + (malformed 'missing_header') + (ident '' 'f') + (record (field 'o')) + (decl + (ident 'foo') + (string 'onmo %'))) \ No newline at end of file diff --git a/src/snapshots/header_unexpected_token.txt b/src/snapshots/header_unexpected_token.txt index 18f86d4fa3..95da7d2cd2 100644 --- a/src/snapshots/header_unexpected_token.txt +++ b/src/snapshots/header_unexpected_token.txt @@ -2,8 +2,8 @@ description= ~~~SOURCE module[% +~~~PROBLEMS +check.parse.IR.Diagnostic.Tag.header_unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS KwModule,OpenSquare,OpPercent,EndOfFile -~~~PARSE_ERRORS -PARSE ERROR header_unexpected_token -PARSE ERROR unexpected_token From 9472052117a91d484c7a733397a07eaad7ecb4df Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 14:46:56 +1100 Subject: [PATCH 21/40] minor cleanup --- src/check/parse/IR.zig | 17 ++--------------- src/fmt.zig | 7 ++++--- src/problem.zig | 2 +- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 6cc1b7e9f4..7a5853d21e 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -607,9 +607,7 @@ pub const NodeStore = struct { node.data.lhs = mod.exposes.span.start; node.data.rhs = mod.exposes.span.len; }, - else => { - // TODO -- should this be ignoring other header types?? - }, + else => {}, } const nid = store.nodes.append(store.gpa, node); return .{ .id = @intFromEnum(nid) }; @@ -1098,18 +1096,7 @@ pub const NodeStore = struct { } }; }, .malformed => { - // TODO -- what should we do here? - const reason: Diagnostic.Tag = @enumFromInt(node.data.lhs); - // switch (reason) { - // .missing_header => { - // // std.debug.print("MISSING HEADER: {}\n", .{reason}); - // return .malformed; - // }, - // else => { - // @panic("ASDFASDF"); - // }, - // } - return .{ .malformed = .{ .reason = reason } }; + return .{ .malformed = .{ .reason = @enumFromInt(node.data.lhs) } }; }, else => { std.debug.panic("Expected a valid header tag, got {s}", .{@tagName(node.tag)}); diff --git a/src/fmt.zig b/src/fmt.zig index e56ad80c95..59b0c34a7f 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -120,7 +120,6 @@ fn formatStatement(fmt: *Formatter, si: StatementIdx) NewlineBehavior { return .extra_newline_needed; }, .malformed => { - // TODO how should we format a malformed here? return .no_extra_newline; }, } @@ -233,6 +232,8 @@ fn formatExpr(fmt: *Formatter, ei: ExprIdx) void { // TODO -- this is a hack to avoid ambiguity with no arguments, // if we parse it again without the space it will be parsed as // a logical OR `||` instead + // + // desired behaviour described here https://roc.zulipchat.com/#narrow/channel/395097-compiler-development/topic/zig.20compiler.20-.20spike/near/504453049 if (arg_slice.len == 0) { fmt.pushAll(" "); } @@ -409,7 +410,7 @@ fn formatPattern(fmt: *Formatter, pi: PatternIdx) void { } }, .malformed => { - // TODO how should we format a malformed here? + // format nothing for malformed patterns }, } } @@ -602,7 +603,7 @@ fn formatTypeAnno(fmt: *Formatter, anno: IR.NodeStore.TypeAnnoIdx) void { fmt.push('_'); }, .malformed => { - // TODO how should we format a malformed here? + // format nothing for malformed type annotations }, } } diff --git a/src/problem.zig b/src/problem.zig index b68b4146bb..75b1bec09f 100644 --- a/src/problem.zig +++ b/src/problem.zig @@ -83,7 +83,7 @@ pub const Problem = union(enum) { _ = fmt; _ = options; - // use a stack allocation for printing our tag errors... + // use a stack allocation for printing our tag errors var buf: [1000]u8 = undefined; switch (self) { From 57553035f3a3981d2b7cd72044c39ed47ac71678 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 15:06:14 +1100 Subject: [PATCH 22/40] minor cleanup --- src/check/parse/tokenize.zig | 3 --- src/snapshots/fuzz_crash_010.txt | 29 +++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 3 deletions(-) create mode 100644 src/snapshots/fuzz_crash_010.txt diff --git a/src/check/parse/tokenize.zig b/src/check/parse/tokenize.zig index baa566ac92..badc5abf87 100644 --- a/src/check/parse/tokenize.zig +++ b/src/check/parse/tokenize.zig @@ -807,7 +807,6 @@ pub const Tokenizer = struct { self.cursor.pos += 1; return; } - std.debug.print("LAST {?}\nBRACE: {?}\n", .{ last, brace }); switch (last.?) { .round => { if (brace != .round) { @@ -1099,11 +1098,9 @@ pub const Tokenizer = struct { self.consumeBraceCloseAndContinueStringInterp(.round); }, ']' => { - std.debug.print("Closing SQUARE brace at position {}\n", .{self.cursor.pos}); self.consumeBraceCloseAndContinueStringInterp(.square); }, close_curly => { - std.debug.print("Closing CURLY brace at position {}\n", .{self.cursor.pos}); self.consumeBraceCloseAndContinueStringInterp(.curly); }, diff --git a/src/snapshots/fuzz_crash_010.txt b/src/snapshots/fuzz_crash_010.txt new file mode 100644 index 0000000000..16b8b525ff --- /dev/null +++ b/src/snapshots/fuzz_crash_010.txt @@ -0,0 +1,29 @@ +~~~META +description=fuzz crash +~~~SOURCE +H{o, +  ] + +foo = + + "on (string 'onmo %'))) +~~~PROBLEMS +check.parse.tokenize.Diagnostic.Tag.AsciiControl +check.parse.tokenize.Diagnostic.Tag.MismatchedBrace +check.parse.tokenize.Diagnostic.Tag.UnclosedString +~~~FORMATTED +H + +{ o } + +foo = "on (string 'onmo %')))" +~~~TOKENS +UpperIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile +~~~PARSE +(file + (malformed 'missing_header') + (tag 'H') + (record (field 'o')) + (decl + (ident 'foo') + (string 'on (string 'onmo %')))'))) \ No newline at end of file From 0961098d917d9f4f90f8f932893703185518c065 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 16:49:52 +1100 Subject: [PATCH 23/40] snapshot and parser fixes --- src/check/parse.zig | 14 +-- src/check/parse/IR.zig | 39 ++++++++- src/check/parse/Parser.zig | 47 +++++----- src/fmt.zig | 3 + src/snapshot.zig | 99 ++++++++++++---------- src/snapshots/001.txt | 16 ++-- src/snapshots/003.txt | 10 ++- src/snapshots/fuzz_crash_001.txt | 10 +++ src/snapshots/fuzz_crash_002.txt | 44 ++++++++++ src/snapshots/fuzz_crash_003.txt | 9 ++ src/snapshots/fuzz_crash_004.txt | 7 +- src/snapshots/fuzz_crash_005.txt | 7 +- src/snapshots/fuzz_crash_006.txt | Bin 350 -> 357 bytes src/snapshots/fuzz_crash_007.txt | 11 +++ src/snapshots/fuzz_crash_008.txt | 7 +- src/snapshots/fuzz_crash_009.txt | 15 ++-- src/snapshots/fuzz_crash_010.txt | 15 ++-- src/snapshots/header_unexpected_token.txt | 7 ++ src/snapshots/some_folder/002.txt | 10 ++- 19 files changed, 263 insertions(+), 107 deletions(-) diff --git a/src/check/parse.zig b/src/check/parse.zig index 7c488cc31f..df33215969 100644 --- a/src/check/parse.zig +++ b/src/check/parse.zig @@ -22,9 +22,10 @@ pub fn parse(env: *base.ModuleEnv, source: []const u8) IR { tokenizer.tokenize(); const result = tokenizer.finishAndDeinit(); - if (result.messages.len > 0) { - tokenizeReport(env.gpa, source, result.messages); - } + // TODO I think we should remove this... it's always printing to stderr + // if (result.messages.len > 0) { + // tokenizeReport(env.gpa, source, result.messages); + // } for (result.messages) |msg| { _ = env.problems.append(env.gpa, .{ .tokenize = msg }); @@ -68,6 +69,9 @@ fn lineNum(newlines: std.ArrayList(usize), pos: u32) u32 { return lineno; } +/// TODO -- I think we should change this to be a method on Diagnostic +/// and then we can have the caller use this to format to a writer +/// this would be helpful for e.g. the snapshot tool which writes to a file instead of stderr fn tokenizeReport(allocator: std.mem.Allocator, source: []const u8, msgs: []const tokenize.Diagnostic) void { std.debug.print("Found the {d} following issues while parsing:\n", .{msgs.len}); var newlines = std.ArrayList(usize).init(allocator); @@ -101,12 +105,12 @@ fn tokenizeReport(allocator: std.mem.Allocator, source: []const u8, msgs: []cons } std.debug.print( - "({d}:{d}-{d}:{d}) Expected the correct closing brace here:\n{s}\n{s}^\n", + "TOKENIZE ERROR: ({d}:{d}-{d}:{d}) Expected the correct closing brace here:\n{s}\n{s}^\n", .{ start_line_num, start_col, end_line_num, end_col, src, spaces.items }, ); }, else => { - std.debug.print("MSG: {any}", .{message}); + std.debug.print("TOKENIZE ERROR: {any}\n", .{message}); }, } } diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 7a5853d21e..d57ed07ca3 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -56,6 +56,7 @@ pub const Diagnostic = struct { pattern_unexpected_token, ty_anno_unexpected_token, statement_unexpected_eof, + statement_unexpected_token, string_unexpected_token, expr_if_missing_else, expr_no_space_dot_int, @@ -607,6 +608,11 @@ pub const NodeStore = struct { node.data.lhs = mod.exposes.span.start; node.data.rhs = mod.exposes.span.len; }, + .malformed => |a| { + node.tag = .malformed; + node.data.lhs = @intFromEnum(a.reason); + node.data.rhs = 0; + }, else => {}, } const nid = store.nodes.append(store.gpa, node); @@ -1427,6 +1433,11 @@ pub const NodeStore = struct { .region = emptyRegion(), } }; }, + .malformed => { + return .{ .malformed = .{ + .reason = @enumFromInt(node.data.lhs), + } }; + }, else => { std.debug.panic("Expected a valid expr tag, got {s}", .{@tagName(node.tag)}); }, @@ -1549,6 +1560,9 @@ pub const NodeStore = struct { .anno = .{ .id = node.data.lhs }, } }; }, + .malformed => { + return .{ .malformed = .{ .reason = @enumFromInt(node.data.lhs) } }; + }, else => { std.debug.panic("Expected a valid type annotation node, found {s}", .{@tagName(node.tag)}); }, @@ -1755,8 +1769,17 @@ pub const NodeStore = struct { return import_node; }, + .type_anno => |a| { + var node = sexpr.Expr.init(env.gpa, "type_anno"); + node.appendStringChild(env.gpa, ir.resolve(a.name)); + + var anno = ir.store.getTypeAnno(a.anno).toSExpr(env, ir); + node.appendNodeChild(env.gpa, &anno); + + return node; + }, else => { - std.debug.print("format for statement {}", .{self}); + std.debug.print("\n\nERROR toSExpr for Statement: {}\n", .{self}); @panic("not implemented"); }, } @@ -1809,6 +1832,20 @@ pub const NodeStore = struct { }, const TagUnionRhs = packed struct { open: u1, tags_len: u31 }; + + pub fn toSExpr(self: @This(), env: *base.ModuleEnv, ir: *IR) sexpr.Expr { + _ = ir; + switch (self) { + .malformed => |a| { + var node = sexpr.Expr.init(env.gpa, "malformed"); + node.appendStringChild(env.gpa, @tagName(a.reason)); + return node; + }, + else => { + std.debug.panic("TODO toSExpr for TypeAnno: {}", .{self}); + }, + } + } }; pub const AnnoRecordField = struct { diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 0efaed211d..41a68ac543 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -229,8 +229,10 @@ fn parseModuleHeader(self: *Parser) IR.NodeStore.HeaderIdx { }); return self.store.addHeader(.{ .malformed = .{ .reason = reason } }); } + self.advance(); const scratch_top = self.store.scratchTokenTop(); + while (self.peek() != .CloseSquare and self.peek() != .EndOfFile) { if (self.peek() != .LowerIdent and self.peek() != .UpperIdent) { // std.debug.panic("TODO: Handler header bad exposes contents: {s}", .{@tagName(self.peek())}); @@ -482,18 +484,10 @@ pub fn parseStmt(self: *Parser) ?IR.NodeStore.StatementIdx { } }); return statement_idx; } else { - // If not a decl - const expr = self.parseExpr(); - const statement_idx = self.store.addStatement(.{ .expr = .{ - .expr = expr, - .region = .{ .start = start, .end = start }, - } }); - if (self.peek() == .Newline) { - self.advance(); - } - return statement_idx; + // continue to parse final expression } }, + // Expect to parse a Type Annotation, e.g. `Foo a : (a,a)` .UpperIdent => { const start = self.pos; if (self.peekNext() == .OpColon or self.peekNext() == .LowerIdent) { @@ -509,27 +503,24 @@ pub fn parseStmt(self: *Parser) ?IR.NodeStore.StatementIdx { .region = .{ .start = start, .end = self.pos }, } }); return statement_idx; + } else { + // continue to parse final expression } - const expr = self.parseExpr(); - const statement_idx = self.store.addStatement(.{ .expr = .{ - .expr = expr, - .region = .{ .start = start, .end = self.pos }, - } }); - return statement_idx; - }, - else => { - const start = self.pos; - const expr = self.parseExpr(); - const statement_idx = self.store.addStatement(.{ .expr = .{ - .expr = expr, - .region = .{ .start = start, .end = self.pos }, - } }); - if (self.peek() == .Newline) { - self.advance(); - } - return statement_idx; }, + else => {}, + } + + // We didn't find any statements, so we must be parsing the final expression. + const start = self.pos; + const expr = self.parseExpr(); + const statement_idx = self.store.addStatement(.{ .expr = .{ + .expr = expr, + .region = .{ .start = start, .end = self.pos }, + } }); + if (self.peek() == .Newline) { + self.advance(); } + return statement_idx; } /// Whether Pattern Alternatives are allowed in the current context diff --git a/src/fmt.zig b/src/fmt.zig index 59b0c34a7f..ef68f10aef 100644 --- a/src/fmt.zig +++ b/src/fmt.zig @@ -308,6 +308,9 @@ fn formatExpr(fmt: *Formatter, ei: ExprIdx) void { .ellipsis => |_| { fmt.pushAll("..."); }, + .malformed => { + // format nothing for malformed expressions + }, else => { std.debug.panic("TODO: Handle formatting {s}", .{@tagName(expr)}); }, diff --git a/src/snapshot.zig b/src/snapshot.zig index 0b678d6457..122696e3e2 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -297,10 +297,6 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor } }; - // Generate the PARSE section - var parse_buffer = std.ArrayList(u8).init(gpa); - defer parse_buffer.deinit(); - var module_env = base.ModuleEnv.init(gpa); defer module_env.deinit(); @@ -311,58 +307,46 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor // shouldn't be required in future parse_ast.store.emptyScratch(); - const has_parse_errors = parse_ast.errors.len > 0; - - // Write the new AST to the parse section - if (!has_parse_errors) { - try parse_ast.toSExprStr(&module_env, parse_buffer.writer().any()); - } - - // Rewrite the file with updated sections + // Overwrite the snapshot file var file = std.fs.cwd().createFile(snapshot_path, .{}) catch |err| { log("failed to create file '{s}': {s}", .{ snapshot_path, @errorName(err) }); return false; }; defer file.close(); - try file.writer().writeAll(Section.META); - try file.writer().writeAll("\n"); - try file.writer().writeAll(content.meta); - try file.writer().writeAll("\n"); - - try file.writer().writeAll(Section.SOURCE); - try file.writer().writeAll("\n"); - try file.writer().writeAll(content.source); - try file.writer().writeAll("\n"); - - if (module_env.problems.len() > 0) { - try file.writer().writeAll(Section.PROBLEMS); + // Copy original META + { + try file.writer().writeAll(Section.META); + try file.writer().writeAll("\n"); + try file.writer().writeAll(content.meta); try file.writer().writeAll("\n"); - var iter = module_env.problems.iterIndices(); - while (iter.next()) |problem_idx| { - try module_env.problems.get(problem_idx).format("", .{}, file); - try file.writer().writeAll("\n"); - } } - // Format the source code - if (!has_parse_errors) { - var formatter = fmt.init(parse_ast); - defer formatter.deinit(); - const formatted = formatter.formatFile(); - defer gpa.free(formatted); + // Copy original SOURCE + { + try file.writer().writeAll(Section.SOURCE); + try file.writer().writeAll("\n"); + try file.writer().writeAll(content.source); + try file.writer().writeAll("\n"); + } - if (!std.mem.eql(u8, formatted, content.source)) { - try file.writer().writeAll(Section.FORMATTED); - try file.writer().writeAll("\n"); - try file.writer().writeAll(formatted); - try file.writer().writeAll("\n"); + // Write out any PROBLEMS + { + try file.writer().writeAll(Section.PROBLEMS); + try file.writer().writeAll("\n"); + if (module_env.problems.len() > 0) { + var iter = module_env.problems.iterIndices(); + while (iter.next()) |problem_idx| { + try module_env.problems.get(problem_idx).format("", .{}, file); + try file.writer().writeAll("\n"); + } + } else { + try file.writer().writeAll("NIL\n"); } } - // Check if tokens should be included - const exclude_tokens = std.mem.indexOf(u8, content.meta, "exclude_tokens=true") != null; - if (!exclude_tokens) { + // Write out any TOKENS + { try file.writer().writeAll(Section.TOKENS); try file.writer().writeAll("\n"); const tokenizedBuffer = parse_ast.tokens; @@ -382,12 +366,38 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor try file.writer().writeAll("\n"); } - if (!has_parse_errors) { + // Write PARSE SECTION + { + var parse_buffer = std.ArrayList(u8).init(gpa); + defer parse_buffer.deinit(); + try parse_ast.toSExprStr(&module_env, parse_buffer.writer().any()); try file.writer().writeAll(Section.PARSE); try file.writer().writeAll("\n"); try file.writer().writeAll(parse_buffer.items); + try file.writer().writeAll("\n"); + } + + // Write FORMAT SECTION + { + var formatter = fmt.init(parse_ast); + defer formatter.deinit(); + const formatted = formatter.formatFile(); + defer gpa.free(formatted); + + try file.writer().writeAll(Section.FORMATTED); + try file.writer().writeAll("\n"); + + if (!std.mem.eql(u8, formatted, content.source)) { + try file.writer().writeAll(formatted); + try file.writer().writeAll("\n"); + } else { + try file.writer().writeAll("NO CHANGE"); + try file.writer().writeAll("\n"); + } } + try file.writer().writeAll("~~~END"); + // If flag --fuzz-corpus is passed, so write the SOURCE to our corpus if (maybe_fuzz_corpus_path != null) { @@ -417,6 +427,7 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor try corpus_file.writer().writeAll(content.source); } + // Log the file path that was written to log("{s}", .{snapshot_path}); return true; diff --git a/src/snapshots/001.txt b/src/snapshots/001.txt index 9cb231d476..1b22aab7e4 100644 --- a/src/snapshots/001.txt +++ b/src/snapshots/001.txt @@ -1,6 +1,5 @@ ~~~META description=Example to develop the snapshot methodology, includes FORMATTED section -exclude_tokens=true ~~~SOURCE module [ # some crazy formatting @@ -10,13 +9,18 @@ module [ foo = "one" -~~~FORMATTED -module [foo] - -foo = "one" +~~~PROBLEMS +NIL +~~~TOKENS +KwModule,OpenSquare,Newline,LowerIdent,Comma,Newline,CloseSquare,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,StringEnd,EndOfFile ~~~PARSE (file (header 'foo') (decl (ident 'foo') - (string 'one'))) \ No newline at end of file + (string 'one'))) +~~~FORMATTED +module [foo] + +foo = "one" +~~~END \ No newline at end of file diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index d2e8292ad8..9f7ba842fc 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -1,12 +1,15 @@ ~~~META description=Import statement -exclude_tokens=true ~~~SOURCE module [decoder] import json.Json decoder = Utf8.decode +~~~PROBLEMS +NIL +~~~TOKENS +KwModule,OpenSquare,LowerIdent,CloseSquare,Newline,KwImport,LowerIdent,NoSpaceDotUpperIdent,Newline,LowerIdent,OpAssign,UpperIdent,NoSpaceDotLowerIdent,EndOfFile ~~~PARSE (file (header 'decoder') @@ -16,4 +19,7 @@ decoder = Utf8.decode '') (decl (ident 'decoder') - (ident 'Utf8' '.decode'))) \ No newline at end of file + (ident 'Utf8' '.decode'))) +~~~FORMATTED +NO CHANGE +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index c5df7f3d8e..a4d5628e37 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -7,3 +7,13 @@ check.parse.IR.Diagnostic.Tag.pattern_unexpected_token check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS LowerIdent,OpBar,OpPercent,EndOfFile +~~~PARSE +(file + (malformed 'missing_header') + (ident '' 'mo') + (malformed 'unexpected_token')) +~~~FORMATTED +mo + + +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_002.txt b/src/snapshots/fuzz_crash_002.txt index a3e4bda0c6..39d42127f3 100644 --- a/src/snapshots/fuzz_crash_002.txt +++ b/src/snapshots/fuzz_crash_002.txt @@ -23,3 +23,47 @@ check.parse.IR.Diagnostic.Tag.unexpected_token check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS LowerIdent,OpColon,MalformedUnknownToken,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,LowerIdent,OpenSquare,OpPercent,EndOfFile +~~~PARSE +(file + (malformed 'missing_header') + (type_anno + 'modu' + (malformed 'ty_anno_unexpected_token')) + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (malformed 'unexpected_token') + (ident '' 'le') + (malformed 'unexpected_token')) +~~~FORMATTED +modu : + + + + + + + + + + + + + + + +le + + +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt index 4d5c3f3ce1..b981f0b015 100644 --- a/src/snapshots/fuzz_crash_003.txt +++ b/src/snapshots/fuzz_crash_003.txt @@ -7,3 +7,12 @@ check.parse.tokenize.Diagnostic.Tag.UnclosedString check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS OpAssign,StringStart,StringPart,EndOfFile +~~~PARSE +(file + (malformed 'missing_header') + (malformed 'unexpected_token') + (string 'te')) +~~~FORMATTED + +"te" +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt index dfd6670eb3..2e71077a16 100644 --- a/src/snapshots/fuzz_crash_004.txt +++ b/src/snapshots/fuzz_crash_004.txt @@ -2,9 +2,14 @@ description=fuzz crash ~~~SOURCE F +~~~PROBLEMS +NIL ~~~TOKENS UpperIdent,EndOfFile ~~~PARSE (file (malformed 'missing_header') - (tag 'F')) \ No newline at end of file + (tag 'F')) +~~~FORMATTED +NO CHANGE +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt index f3e2ff7a49..9a56d7a706 100644 --- a/src/snapshots/fuzz_crash_005.txt +++ b/src/snapshots/fuzz_crash_005.txt @@ -2,9 +2,14 @@ description=fuzz crash ~~~SOURCE modu +~~~PROBLEMS +NIL ~~~TOKENS LowerIdent,EndOfFile ~~~PARSE (file (malformed 'missing_header') - (ident '' 'modu')) \ No newline at end of file + (ident '' 'modu')) +~~~FORMATTED +NO CHANGE +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index 78a9c6d5c4f9499e35d60d7143722fc004ab6ed2..c3ee46939961f28796cd2518f4fa4fec632f31ed 100644 GIT binary patch delta 16 Ycmcb|^pt7BqKP|~PMjyr?CR$N07Ik)4FCWD delta 29 lcmaFLbdPDmB4IcGAYaFj5LXv24}UJM8ig8#vWfFn0sxj>3GM&@ diff --git a/src/snapshots/fuzz_crash_007.txt b/src/snapshots/fuzz_crash_007.txt index c1c8b5a85e..634d3218f4 100644 --- a/src/snapshots/fuzz_crash_007.txt +++ b/src/snapshots/fuzz_crash_007.txt @@ -8,3 +8,14 @@ check.parse.IR.Diagnostic.Tag.unexpected_token check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS LowerIdent,NoSpaceDotInt,NoSpaceDotLowerIdent,EndOfFile +~~~PARSE +(file + (malformed 'missing_header') + (malformed 'expr_no_space_dot_int') + (malformed 'unexpected_token') + (malformed 'unexpected_token')) +~~~FORMATTED + + + +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt index fba539e192..e2c888ec2c 100644 --- a/src/snapshots/fuzz_crash_008.txt +++ b/src/snapshots/fuzz_crash_008.txt @@ -4,8 +4,6 @@ description=fuzz crash ||1 ~~~PROBLEMS check.parse.tokenize.Diagnostic.Tag.AsciiControl -~~~FORMATTED -| | 1 ~~~TOKENS OpBar,OpBar,Int,EndOfFile ~~~PARSE @@ -13,4 +11,7 @@ OpBar,OpBar,Int,EndOfFile (malformed 'missing_header') (lambda (args) - (int '1'))) \ No newline at end of file + (int '1'))) +~~~FORMATTED +| | 1 +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_009.txt b/src/snapshots/fuzz_crash_009.txt index 75b782844c..824043c618 100644 --- a/src/snapshots/fuzz_crash_009.txt +++ b/src/snapshots/fuzz_crash_009.txt @@ -10,12 +10,6 @@ foo = ~~~PROBLEMS check.parse.tokenize.Diagnostic.Tag.MismatchedBrace check.parse.tokenize.Diagnostic.Tag.UnclosedString -~~~FORMATTED -f - -{ o } - -foo = "onmo %" ~~~TOKENS LowerIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile ~~~PARSE @@ -25,4 +19,11 @@ LowerIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAs (record (field 'o')) (decl (ident 'foo') - (string 'onmo %'))) \ No newline at end of file + (string 'onmo %'))) +~~~FORMATTED +f + +{ o } + +foo = "onmo %" +~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_010.txt b/src/snapshots/fuzz_crash_010.txt index 16b8b525ff..375bb5acf5 100644 --- a/src/snapshots/fuzz_crash_010.txt +++ b/src/snapshots/fuzz_crash_010.txt @@ -11,12 +11,6 @@ foo = check.parse.tokenize.Diagnostic.Tag.AsciiControl check.parse.tokenize.Diagnostic.Tag.MismatchedBrace check.parse.tokenize.Diagnostic.Tag.UnclosedString -~~~FORMATTED -H - -{ o } - -foo = "on (string 'onmo %')))" ~~~TOKENS UpperIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile ~~~PARSE @@ -26,4 +20,11 @@ UpperIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAs (record (field 'o')) (decl (ident 'foo') - (string 'on (string 'onmo %')))'))) \ No newline at end of file + (string 'on (string 'onmo %')))'))) +~~~FORMATTED +H + +{ o } + +foo = "on (string 'onmo %')))" +~~~END \ No newline at end of file diff --git a/src/snapshots/header_unexpected_token.txt b/src/snapshots/header_unexpected_token.txt index 95da7d2cd2..8bc5d70b6e 100644 --- a/src/snapshots/header_unexpected_token.txt +++ b/src/snapshots/header_unexpected_token.txt @@ -7,3 +7,10 @@ check.parse.IR.Diagnostic.Tag.header_unexpected_token check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS KwModule,OpenSquare,OpPercent,EndOfFile +~~~PARSE +(file + (malformed 'header_unexpected_token') + (malformed 'unexpected_token')) +~~~FORMATTED + +~~~END \ No newline at end of file diff --git a/src/snapshots/some_folder/002.txt b/src/snapshots/some_folder/002.txt index 2be28c6d80..4c8d443726 100644 --- a/src/snapshots/some_folder/002.txt +++ b/src/snapshots/some_folder/002.txt @@ -1,12 +1,15 @@ ~~~META description=Basic example to develop the snapshot methodology -exclude_tokens=true ~~~SOURCE module [foo, bar] foo = "one" bar = "two" +~~~PROBLEMS +NIL +~~~TOKENS +KwModule,OpenSquare,LowerIdent,Comma,LowerIdent,CloseSquare,Newline,LowerIdent,OpAssign,StringStart,StringPart,StringEnd,Newline,LowerIdent,OpAssign,StringStart,StringPart,StringEnd,EndOfFile ~~~PARSE (file (header 'foo' 'bar') @@ -15,4 +18,7 @@ bar = "two" (string 'one')) (decl (ident 'bar') - (string 'two'))) \ No newline at end of file + (string 'two'))) +~~~FORMATTED +NO CHANGE +~~~END \ No newline at end of file From 634dfb19723e7620571d697fcbec816607c13d6e Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Mon, 10 Mar 2025 17:14:06 +1100 Subject: [PATCH 24/40] don't change the old builtins --- crates/compiler/builtins/bitcode/src/list.zig | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/crates/compiler/builtins/bitcode/src/list.zig b/crates/compiler/builtins/bitcode/src/list.zig index bc1b219b94..b461dd74ce 100644 --- a/crates/compiler/builtins/bitcode/src/list.zig +++ b/crates/compiler/builtins/bitcode/src/list.zig @@ -517,9 +517,9 @@ pub fn listSwap( const source_ptr = @as([*]u8, @ptrCast(newList.bytes)); swapElements(source_ptr, element_width, @as(usize, - // We already verified that both indices are less than the stored list length, - // which is usize, so casting them to usize will definitely be lossless. - @intCast(index_1)), @as(usize, @intCast(index_2)), copy); + // We already verified that both indices are less than the stored list length, + // which is usize, so casting them to usize will definitely be lossless. + @intCast(index_1)), @as(usize, @intCast(index_2)), copy); return newList; } From 77062990407032f7bb1f64d27141e10c187e0fc1 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 10:39:21 +1100 Subject: [PATCH 25/40] use addMalformed for header_expected_open_bracket --- src/check/parse/IR.zig | 5 ----- src/check/parse/Parser.zig | 8 +------- src/coordinate.zig | 1 - src/snapshots/header_expected_open_bracket.txt | 13 +++++++++++++ src/snapshots/header_unexpected_token.txt | 14 -------------- 5 files changed, 14 insertions(+), 27 deletions(-) create mode 100644 src/snapshots/header_expected_open_bracket.txt delete mode 100644 src/snapshots/header_unexpected_token.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 1481844e6d..87c0738dac 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -634,11 +634,6 @@ pub const NodeStore = struct { node.data.lhs = mod.exposes.span.start; node.data.rhs = mod.exposes.span.len; }, - .malformed => |a| { - node.tag = .malformed; - node.data.lhs = @intFromEnum(a.reason); - node.data.rhs = 0; - }, else => {}, } const nid = store.nodes.append(store.gpa, node); diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 98889cc854..14e6dab698 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -220,13 +220,7 @@ fn parseModuleHeader(self: *Parser) IR.NodeStore.HeaderIdx { // Get exposes self.expect(.OpenSquare) catch { - // std.debug.panic("TODO: Handle header with no exposes open bracket: {s}", .{@tagName(self.peek())}); - const reason: IR.Diagnostic.Tag = .header_expected_open_bracket; - self.pushDiagnostic(reason, .{ - .start = self.pos, - .end = self.pos, - }); - return self.store.addHeader(.{ .malformed = .{ .reason = reason } }); + return self.store.addMalformed(IR.NodeStore.HeaderIdx, .header_expected_open_bracket, self.pos); }; const scratch_top = self.store.scratchExposedItemTop(); self.parseCollectionSpan(IR.NodeStore.ExposedItemIdx, .CloseSquare, IR.NodeStore.addScratchExposedItem, Parser.parseExposedItem) catch { diff --git a/src/coordinate.zig b/src/coordinate.zig index e6d4dc07b9..cf2a97579c 100644 --- a/src/coordinate.zig +++ b/src/coordinate.zig @@ -508,7 +508,6 @@ fn parseDependenciesFromPackageRoot( .len = 0, } }, .malformed => { - // @Sam -- does this look right? return ParsePackageDepsErr.malformed_header; }, }; diff --git a/src/snapshots/header_expected_open_bracket.txt b/src/snapshots/header_expected_open_bracket.txt new file mode 100644 index 0000000000..066ab3a87b --- /dev/null +++ b/src/snapshots/header_expected_open_bracket.txt @@ -0,0 +1,13 @@ +~~~META +description= +~~~SOURCE +module +~~~PROBLEMS +NIL +~~~TOKENS +KwModule,EndOfFile +~~~PARSE +(file (malformed 'header_expected_open_bracket')) +~~~FORMATTED + +~~~END \ No newline at end of file diff --git a/src/snapshots/header_unexpected_token.txt b/src/snapshots/header_unexpected_token.txt deleted file mode 100644 index 67277f7366..0000000000 --- a/src/snapshots/header_unexpected_token.txt +++ /dev/null @@ -1,14 +0,0 @@ -~~~META -description= -~~~SOURCE -module[% -~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.import_exposing_no_close -~~~TOKENS -KwModule,OpenSquare,OpPercent,EndOfFile -~~~PARSE -(file (malformed 'import_exposing_no_close')) -~~~FORMATTED - -~~~END \ No newline at end of file From edee4babe06147d7f5a20c30e071e867f6c9077d Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 10:47:14 +1100 Subject: [PATCH 26/40] cleanup use of malformed --- src/check/parse/IR.zig | 41 ++++++++---------- src/check/parse/Parser.zig | 18 +++----- src/snapshots/fuzz_crash_001.txt | 5 +-- src/snapshots/fuzz_crash_002.txt | 36 +++++++-------- src/snapshots/fuzz_crash_003.txt | 2 +- src/snapshots/fuzz_crash_004.txt | 2 +- src/snapshots/fuzz_crash_005.txt | 2 +- src/snapshots/fuzz_crash_006.txt | Bin 383 -> 390 bytes src/snapshots/fuzz_crash_007.txt | 6 +-- src/snapshots/fuzz_crash_008.txt | 4 +- src/snapshots/fuzz_crash_009.txt | 2 +- src/snapshots/fuzz_crash_010.txt | 2 +- .../header_expected_open_bracket.txt | 2 +- 13 files changed, 54 insertions(+), 68 deletions(-) diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 87c0738dac..0116d8747c 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -634,6 +634,9 @@ pub const NodeStore = struct { node.data.lhs = mod.exposes.span.start; node.data.rhs = mod.exposes.span.len; }, + .malformed => { + @panic("use addMalformed instead"); + }, else => {}, } const nid = store.nodes.append(store.gpa, node); @@ -749,10 +752,8 @@ pub const NodeStore = struct { node.data.lhs = a.name; node.data.rhs = a.anno.id; }, - .malformed => |m| { - node.tag = .malformed; - node.data.lhs = @intFromEnum(m.reason); - node.data.rhs = 0; + .malformed => { + @panic("use addMalformed instead"); }, } const nid = store.nodes.append(store.gpa, node); @@ -818,10 +819,8 @@ pub const NodeStore = struct { node.data.lhs = a.patterns.span.start; node.data.rhs = a.patterns.span.len; }, - .malformed => |a| { - node.tag = .malformed; - node.data.lhs = @intFromEnum(a.reason); - node.data.rhs = 0; + .malformed => { + @panic("use addMalformed instead"); }, } const nid = store.nodes.append(store.gpa, node); @@ -948,10 +947,8 @@ pub const NodeStore = struct { .ellipsis => |_| { node.tag = .ellipsis; }, - .malformed => |m| { - node.tag = .malformed; - node.data.lhs = @intFromEnum(m.reason); - node.data.rhs = 0; + .malformed => { + @panic("use addMalformed instead"); }, } const nid = store.nodes.append(store.gpa, node); @@ -1110,10 +1107,8 @@ pub const NodeStore = struct { node.tag = .ty_parens; node.data.lhs = p.anno.id; }, - .malformed => |a| { - node.tag = .malformed; - node.data.lhs = @intFromEnum(a.reason); - node.data.rhs = 0; + .malformed => { + @panic("use addMalformed instead"); }, } @@ -1548,9 +1543,7 @@ pub const NodeStore = struct { } }; }, .malformed => { - return .{ .malformed = .{ - .reason = @enumFromInt(node.data.lhs), - } }; + return .{ .malformed = .{ .reason = @enumFromInt(node.data.lhs) } }; }, else => { std.debug.panic("Expected a valid expr tag, got {s}", .{@tagName(node.tag)}); @@ -1781,7 +1774,7 @@ pub const NodeStore = struct { return header_node; }, .malformed => |a| { - var node = sexpr.Expr.init(env.gpa, "malformed"); + var node = sexpr.Expr.init(env.gpa, "malformed_header"); node.appendStringChild(env.gpa, @tagName(a.reason)); return node; }, @@ -2116,7 +2109,7 @@ pub const NodeStore = struct { return ir.store.getTypeAnno(a.anno).toSExpr(env, ir); }, .malformed => |a| { - var node = sexpr.Expr.init(env.gpa, "malformed"); + var node = sexpr.Expr.init(env.gpa, "malformed_expr"); node.appendStringChild(env.gpa, @tagName(a.reason)); return node; }, @@ -2187,7 +2180,7 @@ pub const NodeStore = struct { return node; }, .malformed => |a| { - var node = sexpr.Expr.init(env.gpa, "malformed"); + var node = sexpr.Expr.init(env.gpa, "malformed_pattern"); node.appendStringChild(env.gpa, @tagName(a.reason)); return node; }, @@ -2347,9 +2340,9 @@ pub const NodeStore = struct { } return node; }, - // (malformed ) + // (malformed_expr ) .malformed => |a| { - var node = sexpr.Expr.init(env.gpa, "malformed"); + var node = sexpr.Expr.init(env.gpa, "malformed_expr"); node.appendStringChild(env.gpa, @tagName(a.reason)); return node; }, diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 14e6dab698..d26d426e3b 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -612,7 +612,7 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt self.advance(); } self.store.clearScratchPatternsFrom(scratch_top); - return self.pushMalformed(IR.NodeStore.PatternIdx, .unexpected_token); + return self.pushMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token); }; const args = self.store.patternSpanFrom(scratch_top); pattern = self.store.addPattern(.{ .tag = .{ @@ -650,7 +650,7 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt self.advance(); } self.store.clearScratchPatternsFrom(scratch_top); - return self.pushMalformed(IR.NodeStore.PatternIdx, .unexpected_token); + return self.pushMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token); }; const patterns = self.store.patternSpanFrom(scratch_top); @@ -671,7 +671,7 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt } const fields = self.store.patternRecordFieldSpanFrom(scratch_top); if (self.peek() != .CloseCurly) { - return self.pushMalformed(IR.NodeStore.PatternIdx, .unexpected_token); + return self.pushMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token); } self.advance(); pattern = self.store.addPattern(.{ .record = .{ @@ -686,7 +686,7 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt if (self.peek() == .KwAs) { self.advance(); if (self.peek() != .LowerIdent) { - return self.pushMalformed(IR.NodeStore.PatternIdx, .unexpected_token); + return self.pushMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token); } name = self.pos; end = self.pos; @@ -711,7 +711,7 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt self.advance(); } self.store.clearScratchPatternsFrom(scratch_top); - return self.pushMalformed(IR.NodeStore.PatternIdx, .unexpected_token); + return self.pushMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token); }; const patterns = self.store.patternSpanFrom(scratch_top); @@ -721,13 +721,7 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt } }); }, else => { - // std.debug.panic("TODO: Handle parsing pattern starting with: {s}", .{@tagName(self.peek())}) - const reason: IR.Diagnostic.Tag = .pattern_unexpected_token; - self.pushDiagnostic(reason, .{ - .start = self.pos, - .end = self.pos, - }); - return self.store.addPattern(.{ .malformed = .{ .reason = reason } }); + return self.store.addMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token, self.pos); }, } diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index 6698af6e54..3833330e7f 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -4,14 +4,13 @@ description=fuzz crash mo|% ~~~PROBLEMS check.parse.IR.Diagnostic.Tag.missing_header -check.parse.IR.Diagnostic.Tag.pattern_unexpected_token check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS LowerIdent,OpBar,OpPercent,EndOfFile ~~~PARSE (file - (malformed 'missing_header') - (malformed 'unexpected_token')) + (malformed_header 'missing_header') + (malformed_expr 'unexpected_token')) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_002.txt b/src/snapshots/fuzz_crash_002.txt index 683a3a8b20..b7c0c937ab 100644 --- a/src/snapshots/fuzz_crash_002.txt +++ b/src/snapshots/fuzz_crash_002.txt @@ -26,25 +26,25 @@ check.parse.IR.Diagnostic.Tag.unexpected_token LowerIdent,OpColon,MalformedUnknownToken,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,LowerIdent,OpenSquare,OpPercent,EndOfFile ~~~PARSE (file - (malformed 'missing_header') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') - (malformed 'unexpected_token') + (malformed_header 'missing_header') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') + (malformed_expr 'unexpected_token') (ident '' 'le') - (malformed 'unexpected_token')) + (malformed_expr 'unexpected_token')) ~~~FORMATTED diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt index e9d6e08092..4aa3d0e6a1 100644 --- a/src/snapshots/fuzz_crash_003.txt +++ b/src/snapshots/fuzz_crash_003.txt @@ -9,7 +9,7 @@ check.parse.IR.Diagnostic.Tag.missing_header OpAssign,StringStart,StringPart,EndOfFile ~~~PARSE (file - (malformed 'missing_header') + (malformed_header 'missing_header') (string 'te')) ~~~FORMATTED "te" diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt index f988befd21..069c052a5f 100644 --- a/src/snapshots/fuzz_crash_004.txt +++ b/src/snapshots/fuzz_crash_004.txt @@ -7,7 +7,7 @@ check.parse.IR.Diagnostic.Tag.missing_header ~~~TOKENS UpperIdent,EndOfFile ~~~PARSE -(file (malformed 'missing_header')) +(file (malformed_header 'missing_header')) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt index bcf8d6b92f..d23f9ad827 100644 --- a/src/snapshots/fuzz_crash_005.txt +++ b/src/snapshots/fuzz_crash_005.txt @@ -7,7 +7,7 @@ check.parse.IR.Diagnostic.Tag.missing_header ~~~TOKENS LowerIdent,EndOfFile ~~~PARSE -(file (malformed 'missing_header')) +(file (malformed_header 'missing_header')) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index 5ad1085487187732a09f91159cb2573b6b3c84e0..0eecf855f5e8b51b4cb6617bf5cb98e81f39068c 100644 GIT binary patch delta 19 acmey*)W*C)o{>F1BQ-H4wP> Date: Tue, 11 Mar 2025 13:07:37 +1100 Subject: [PATCH 27/40] malformed for expr_if_missing_else --- src/check/parse/Parser.zig | 8 +------- src/snapshots/expr_if_missing_else.txt | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+), 7 deletions(-) create mode 100644 src/snapshots/expr_if_missing_else.txt diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index d26d426e3b..49adef96bf 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -947,13 +947,7 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { const condition = self.parseExpr(); const then = self.parseExpr(); if (self.peek() != .KwElse) { - // std.debug.panic("TODO: problem for no else {s}@{d}", .{ @tagName(self.peek()), self.pos }); - const reason: IR.Diagnostic.Tag = .expr_if_missing_else; - self.pushDiagnostic(reason, .{ - .start = self.pos, - .end = self.pos, - }); - return self.store.addExpr(.{ .malformed = .{ .reason = reason } }); + return self.store.addMalformed(IR.NodeStore.ExprIdx, .expr_if_missing_else, self.pos); } self.advance(); const else_idx = self.parseExpr(); diff --git a/src/snapshots/expr_if_missing_else.txt b/src/snapshots/expr_if_missing_else.txt new file mode 100644 index 0000000000..9815ce45cf --- /dev/null +++ b/src/snapshots/expr_if_missing_else.txt @@ -0,0 +1,24 @@ +~~~META +description= +~~~SOURCE +module [] + +foo = if tru then 0 +~~~PROBLEMS +NIL +~~~TOKENS +KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,KwIf,LowerIdent,LowerIdent,Int,EndOfFile +~~~PARSE +(file + (header) + (decl + (ident 'foo') + (malformed_expr 'expr_if_missing_else')) + (int '0')) +~~~FORMATTED +module [] + +foo = + +0 +~~~END \ No newline at end of file From b5bbf77d52389b61127f90c8532119e4a1a5015d Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 13:09:05 +1100 Subject: [PATCH 28/40] malformed for expr_no_space_dot_int --- src/check/parse/Parser.zig | 10 ++-------- src/snapshots/expr_no_space_dot_int.txt | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 8 deletions(-) create mode 100644 src/snapshots/expr_no_space_dot_int.txt diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 49adef96bf..6eeebf4316 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -1005,14 +1005,8 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { var expression = self.parseExprSuffix(start, e); while (self.peek() == .NoSpaceDotInt or self.peek() == .NoSpaceDotLowerIdent) { const tok = self.peek(); - if (tok == .NoSpaceDotInt) { // NoSpaceDotInt - // std.debug.panic("TODO: Handle NoSpaceDotInt case", .{}); - const reason: IR.Diagnostic.Tag = .expr_no_space_dot_int; - self.pushDiagnostic(reason, .{ - .start = self.pos, - .end = self.pos, - }); - return self.store.addExpr(.{ .malformed = .{ .reason = reason } }); + if (tok == .NoSpaceDotInt) { + return self.store.addMalformed(IR.NodeStore.ExprIdx, .expr_no_space_dot_int, self.pos); } else { // NoSpaceDotLowerIdent const s = self.pos; const ident = self.store.addExpr(.{ .ident = .{ diff --git a/src/snapshots/expr_no_space_dot_int.txt b/src/snapshots/expr_no_space_dot_int.txt new file mode 100644 index 0000000000..cef0b41ccf --- /dev/null +++ b/src/snapshots/expr_no_space_dot_int.txt @@ -0,0 +1,24 @@ +~~~META +description= +~~~SOURCE +module [] + +foo = asd.0 +~~~PROBLEMS +check.parse.IR.Diagnostic.Tag.unexpected_token +~~~TOKENS +KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,LowerIdent,NoSpaceDotInt,EndOfFile +~~~PARSE +(file + (header) + (decl + (ident 'foo') + (malformed_expr 'expr_no_space_dot_int')) + (malformed_expr 'unexpected_token')) +~~~FORMATTED +module [] + +foo = + + +~~~END \ No newline at end of file From bb455e2cba0fa6ed49783697ff7b546f155cfa78 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 13:15:24 +1100 Subject: [PATCH 29/40] malformed for ty_anno_unexpected_token --- src/check/parse/Parser.zig | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 6eeebf4316..9746c0f7d3 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -1146,14 +1146,7 @@ pub fn parseStringExpr(self: *Parser) IR.NodeStore.ExprIdx { }, else => { // Something is broken in the tokenizer if we get here! - // std.debug.print("Unexpected token in string: {s}\n", .{@tagName(self.peek())}); - // unreachable; - const reason: IR.Diagnostic.Tag = .string_unexpected_token; - self.pushDiagnostic(reason, .{ - .start = self.pos, - .end = self.pos, - }); - return self.store.addExpr(.{ .malformed = .{ .reason = reason } }); + return self.store.addMalformed(IR.NodeStore.ExprIdx, .string_unexpected_token, self.pos); }, } } @@ -1211,7 +1204,7 @@ const TyFnArgs = enum { looking_for_args, }; -/// todo +/// Parse a type annotation, e.g. `Foo(a) : (a,Str,I64)` pub fn parseTypeAnno(self: *Parser, looking_for_args: TyFnArgs) IR.NodeStore.TypeAnnoIdx { const start = self.pos; var anno: ?IR.NodeStore.TypeAnnoIdx = null; @@ -1330,13 +1323,7 @@ pub fn parseTypeAnno(self: *Parser, looking_for_args: TyFnArgs) IR.NodeStore.Typ self.advance(); // Advance past Underscore }, else => { - // std.debug.panic("Could not parse type annotation, got {s}@{d}", .{ @tagName(self.peek()), self.pos }); - const reason: IR.Diagnostic.Tag = .ty_anno_unexpected_token; - self.pushDiagnostic(reason, .{ - .start = self.pos, - .end = self.pos, - }); - return self.store.addTypeAnno(.{ .malformed = .{ .reason = reason } }); + return self.store.addMalformed(IR.NodeStore.TypeAnnoIdx, .ty_anno_unexpected_token, self.pos); }, } From b43d9f0a50d3f5c8ff962f7c2bbb1a7620aa1079 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 13:23:02 +1100 Subject: [PATCH 30/40] remove file accidentally added --- example.json | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 example.json diff --git a/example.json b/example.json deleted file mode 100644 index fc2e9fa773..0000000000 --- a/example.json +++ /dev/null @@ -1,4 +0,0 @@ -{ - "description": "Basic example to develop the snapshot methodology", - "source": "module [foo, bar]\n\nfoo = \"one\"\n\nbar = \"two\"" -} From f66261f3b972a4b416ba82b868e69f6d112b02a6 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 13:23:30 +1100 Subject: [PATCH 31/40] remove old debug assert --- src/check/parse/Parser.zig | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index 9746c0f7d3..c7afc50871 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -153,9 +153,6 @@ pub fn parseFile(self: *Parser) void { } } - // TODO: fix me, blows up on empty input - // std.debug.assert(self.store.scratch_statements.items.len > 0); - _ = self.store.addFile(.{ .header = header, .statements = self.store.statementSpanFrom(scratch_top), From 18593b5e2f6751abe7ea5fa9e6bc0590bc045611 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 13:40:26 +1100 Subject: [PATCH 32/40] add more SExpr's for Pattern --- src/check/parse.zig | 5 ----- src/check/parse/IR.zig | 36 ++++++++++++++++++++++++++++++- src/coordinate/Filesystem.zig | 33 ++++++++++++----------------- src/snapshots/hello_world.txt | 40 +++++++++++++++++++++++++++++++++++ 4 files changed, 88 insertions(+), 26 deletions(-) create mode 100644 src/snapshots/hello_world.txt diff --git a/src/check/parse.zig b/src/check/parse.zig index 98dc11cbf1..3e3aab6fff 100644 --- a/src/check/parse.zig +++ b/src/check/parse.zig @@ -22,11 +22,6 @@ pub fn parse(env: *base.ModuleEnv, source: []const u8) IR { tokenizer.tokenize(); const result = tokenizer.finishAndDeinit(); - // TODO I think we should remove this... it's always printing to stderr - // if (result.messages.len > 0) { - // tokenizeReport(env.gpa, source, result.messages); - // } - for (result.messages) |msg| { _ = env.problems.append(env.gpa, .{ .tokenize = msg }); } diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index 0116d8747c..a5e1886bb5 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -2179,12 +2179,46 @@ pub const NodeStore = struct { return node; }, + .tag => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, + .number => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, + .string => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, + .record => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, + .list => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, + .list_rest => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, + .tuple => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, + .underscore => { + return sexpr.Expr.init(env.gpa, "underscore"); + }, + .alternatives => { + std.debug.print("TODO implement toSExpr for Pattern {} not yet implemented", .{self}); + @panic("unimplemented"); + }, .malformed => |a| { var node = sexpr.Expr.init(env.gpa, "malformed_pattern"); node.appendStringChild(env.gpa, @tagName(a.reason)); return node; }, - else => @panic("formatting for this pattern not yet implemented"), } } }; diff --git a/src/coordinate/Filesystem.zig b/src/coordinate/Filesystem.zig index fc6b3c80eb..c4cfcb4967 100644 --- a/src/coordinate/Filesystem.zig +++ b/src/coordinate/Filesystem.zig @@ -38,7 +38,7 @@ pub const ReadError = std.fs.File.OpenError || std.posix.ReadError || Allocator. pub const OpenError = std.fs.File.OpenError || std.fs.Dir.AccessError; /// All errors that can occur when canonicalizing a filepath. -pub const CanonicalizeError = error{ FileNotFound, Unknown, OutOfMemory }; // || std.posix.RealPathError; +pub const CanonicalizeError = error{ FileNotFound, Unknown, OutOfMemory } || std.posix.RealPathError; /// An abstracted directory handle. pub const Dir = struct { @@ -76,16 +76,12 @@ pub const Dir = struct { /// Canonicalize the given filepath relative to this dir's path. pub fn canonicalize(dir: *Dir, filename: []const u8, allocator: Allocator) CanonicalizeError![]const u8 { - // return dir.dir.realpathAlloc(allocator, filename) catch |err| { - // switch (err) { - // error.OutOfMemory => exitOnOom(error.OutOfMemory), - // else => return err, - // } - // }; - _ = dir; - _ = filename; - _ = allocator; - return "TODO"; + return dir.dir.realpathAlloc(allocator, filename) catch |err| { + switch (err) { + error.OutOfMemory => exitOnOom(error.OutOfMemory), + else => return err, + } + }; } /// Close this directory. @@ -167,13 +163,10 @@ fn baseNameDefault(absolute_path: []const u8) ?[]const u8 { } fn canonicalizeDefault(root_relative_path: []const u8, allocator: Allocator) CanonicalizeError![]const u8 { - // return std.fs.realpathAlloc(allocator, root_relative_path) catch |err| { - // return switch (err) { - // error.FileNotFound => error.FileNotFound, - // else => error.Unknown, - // }; - // }; - _ = root_relative_path; - _ = allocator; - return "TODO"; + return std.fs.realpathAlloc(allocator, root_relative_path) catch |err| { + return switch (err) { + error.FileNotFound => error.FileNotFound, + else => error.Unknown, + }; + }; } diff --git a/src/snapshots/hello_world.txt b/src/snapshots/hello_world.txt new file mode 100644 index 0000000000..649ca6d1d1 --- /dev/null +++ b/src/snapshots/hello_world.txt @@ -0,0 +1,40 @@ +~~~META +description=Hello world +~~~SOURCE +app [main!] { pf: platform "../basic-cli/platform.roc" } + +import pf.Stdout + +main! = |_| Stdout.line!("Hello, world!") +~~~PROBLEMS +check.parse.IR.Diagnostic.Tag.expected_package_or_platform_name +check.parse.IR.Diagnostic.Tag.unexpected_token +check.parse.IR.Diagnostic.Tag.unexpected_token +~~~TOKENS +KwApp,OpenSquare,LowerIdent,CloseSquare,OpenCurly,LowerIdent,OpColon,KwPlatform,StringStart,StringPart,StringEnd,CloseCurly,Newline,KwImport,LowerIdent,NoSpaceDotUpperIdent,Newline,LowerIdent,OpAssign,OpBar,Underscore,OpBar,UpperIdent,NoSpaceDotLowerIdent,NoSpaceOpenRound,StringStart,StringPart,StringEnd,CloseRound,EndOfFile +~~~PARSE +(file + (malformed_header 'expected_package_or_platform_name') + (malformed_expr 'unexpected_token') + (string '../basic-cli/platform.roc') + (malformed_expr 'unexpected_token') + (import + 'pf' + '.Stdout' + '') + (decl + (ident 'main!') + (lambda + (args (underscore)) + (apply + (ident 'Stdout' '.line!') + (string 'Hello, world!'))))) +~~~FORMATTED + +"../basic-cli/platform.roc" + + +import pf.Stdout + +main! = |_| Stdout.line!("Hello, world!") +~~~END \ No newline at end of file From f09af5381ea130e996ffdca10de6b3f8bd7d3e20 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 14:01:15 +1100 Subject: [PATCH 33/40] fix tests, remove SExpr unit test in favour of snapshots --- src/check/parse.zig | 52 ------------------------- src/check/parse/IR.zig | 23 ++++++++++- src/check/parse/Parser.zig | 3 +- src/snapshots/001.txt | 2 +- src/snapshots/003.txt | 2 +- src/snapshots/add_var_with_spaces.txt | 2 +- src/snapshots/expr_if_missing_else.txt | 2 +- src/snapshots/expr_no_space_dot_int.txt | 2 +- src/snapshots/hello_world.txt | 17 ++------ src/snapshots/if_then_else.txt | 2 +- src/snapshots/some_folder/002.txt | 2 +- src/snapshots/type_annotations.txt | 2 +- src/snapshots/type_declarations.txt | 2 +- 13 files changed, 35 insertions(+), 78 deletions(-) diff --git a/src/check/parse.zig b/src/check/parse.zig index 3e3aab6fff..ea9f10ebc2 100644 --- a/src/check/parse.zig +++ b/src/check/parse.zig @@ -110,55 +110,3 @@ fn tokenizeReport(allocator: std.mem.Allocator, source: []const u8, msgs: []cons } } } - -// TODO move this somewhere better, for now it's here to keep it simple. -fn testSExprHelper(source: []const u8, expected: []const u8) !void { - var env = base.ModuleEnv.init(testing.allocator); - defer env.deinit(); - - // parse our source - var parse_ast = parse(&env, source); - defer parse_ast.deinit(); - std.testing.expectEqualSlices(IR.Diagnostic, &[_]IR.Diagnostic{}, parse_ast.errors) catch { - std.debug.print("Tokens:\n{any}", .{parse_ast.tokens.tokens.items(.tag)}); - std.debug.panic("Test failed with parse errors", .{}); - }; - - // shouldn't be required in future - parse_ast.store.emptyScratch(); - - // buffer to write our SExpr to - var buf = std.ArrayList(u8).init(testing.allocator); - defer buf.deinit(); - - // convert the AST to our SExpr - try parse_ast.toSExprStr(&env, buf.writer().any()); - - // TODO in future we should just write the SExpr to a file and snapshot it - // for now we are comparing strings to keep it simple - try testing.expectEqualStrings(expected, buf.items[0..]); -} - -test "example s-expr" { - const source = - \\module [foo, bar] - \\ - \\foo = "hey" - \\bar = "yo" - ; - - const expected = - \\(file - \\ (header - \\ (exposed_item (lower_ident 'foo')) - \\ (exposed_item (lower_ident 'bar'))) - \\ (decl - \\ (ident 'foo') - \\ (string 'hey')) - \\ (decl - \\ (ident 'bar') - \\ (string 'yo'))) - ; - - try testSExprHelper(source, expected); -} diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index a5e1886bb5..b090358c0b 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -1762,8 +1762,13 @@ pub const NodeStore = struct { pub fn toSExpr(self: @This(), env: *base.ModuleEnv, ir: *IR) sexpr.Expr { switch (self) { + .app => { + var node = sexpr.Expr.init(env.gpa, "app"); + node.appendStringChild(env.gpa, "TODO implement toSExpr for app module header"); + return node; + }, .module => |module| { - var header_node = sexpr.Expr.init(env.gpa, "header"); + var header_node = sexpr.Expr.init(env.gpa, "module"); for (ir.store.exposedItemSlice(module.exposes)) |exposed| { const item = ir.store.getExposedItem(exposed); @@ -1773,12 +1778,26 @@ pub const NodeStore = struct { return header_node; }, + .package => { + var node = sexpr.Expr.init(env.gpa, "package"); + node.appendStringChild(env.gpa, "TODO implement toSExpr for package module header"); + return node; + }, + .platform => { + var node = sexpr.Expr.init(env.gpa, "platform"); + node.appendStringChild(env.gpa, "TODO implement toSExpr for platform module header"); + return node; + }, + .hosted => { + var node = sexpr.Expr.init(env.gpa, "hosted"); + node.appendStringChild(env.gpa, "TODO implement toSExpr for hosted module header"); + return node; + }, .malformed => |a| { var node = sexpr.Expr.init(env.gpa, "malformed_header"); node.appendStringChild(env.gpa, @tagName(a.reason)); return node; }, - else => @panic("not implemented"), } } }; diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index c7afc50871..d12325b81d 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -266,11 +266,12 @@ pub fn parseAppHeader(self: *Parser) IR.NodeStore.HeaderIdx { self.expect(.OpenCurly) catch { return self.pushMalformed(IR.NodeStore.HeaderIdx, .expected_package_platform_open_curly); }; - self.advance(); + while (self.peek() != .CloseCurly and self.peek() != .EndOfFile) { const entry_start = self.pos; if (self.peek() != .LowerIdent) { self.store.clearScratchRecordFieldsFrom(fields_scratch_top); + std.debug.print("GOT {}", .{self.peek()}); return self.pushMalformed(IR.NodeStore.HeaderIdx, .expected_package_or_platform_name); } const name_tok = self.pos; diff --git a/src/snapshots/001.txt b/src/snapshots/001.txt index 9fc5b99050..55ceff8f18 100644 --- a/src/snapshots/001.txt +++ b/src/snapshots/001.txt @@ -15,7 +15,7 @@ NIL KwModule,OpenSquare,Newline,LowerIdent,Comma,Newline,CloseSquare,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,StringEnd,EndOfFile ~~~PARSE (file - (header + (module (exposed_item (lower_ident 'foo'))) (decl (ident 'foo') diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index e7c45c2aaa..36adb919c5 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -18,7 +18,7 @@ NIL KwModule,OpenSquare,LowerIdent,CloseSquare,Newline,KwImport,LowerIdent,NoSpaceDotUpperIdent,Newline,LowerIdent,OpAssign,UpperIdent,NoSpaceDotLowerIdent,Newline,KwCrash,StringStart,StringPart,StringEnd,Newline,KwExpect,Int,OpEquals,Int,Newline,KwReturn,Int,EndOfFile ~~~PARSE (file - (header + (module (exposed_item (lower_ident 'decoder'))) (import 'json' diff --git a/src/snapshots/add_var_with_spaces.txt b/src/snapshots/add_var_with_spaces.txt index 3226f1557d..3d2ac0a91c 100644 --- a/src/snapshots/add_var_with_spaces.txt +++ b/src/snapshots/add_var_with_spaces.txt @@ -10,7 +10,7 @@ NIL KwModule,OpenSquare,LowerIdent,CloseSquare,Newline,LowerIdent,OpAssign,LowerIdent,OpPlus,Int,EndOfFile ~~~PARSE (file - (header + (module (exposed_item (lower_ident 'add2'))) (decl (ident 'add2') diff --git a/src/snapshots/expr_if_missing_else.txt b/src/snapshots/expr_if_missing_else.txt index 9815ce45cf..a70eb9475c 100644 --- a/src/snapshots/expr_if_missing_else.txt +++ b/src/snapshots/expr_if_missing_else.txt @@ -10,7 +10,7 @@ NIL KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,KwIf,LowerIdent,LowerIdent,Int,EndOfFile ~~~PARSE (file - (header) + (module) (decl (ident 'foo') (malformed_expr 'expr_if_missing_else')) diff --git a/src/snapshots/expr_no_space_dot_int.txt b/src/snapshots/expr_no_space_dot_int.txt index cef0b41ccf..ba64cd283d 100644 --- a/src/snapshots/expr_no_space_dot_int.txt +++ b/src/snapshots/expr_no_space_dot_int.txt @@ -10,7 +10,7 @@ check.parse.IR.Diagnostic.Tag.unexpected_token KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,LowerIdent,NoSpaceDotInt,EndOfFile ~~~PARSE (file - (header) + (module) (decl (ident 'foo') (malformed_expr 'expr_no_space_dot_int')) diff --git a/src/snapshots/hello_world.txt b/src/snapshots/hello_world.txt index 649ca6d1d1..5bf4c881d6 100644 --- a/src/snapshots/hello_world.txt +++ b/src/snapshots/hello_world.txt @@ -7,17 +7,12 @@ import pf.Stdout main! = |_| Stdout.line!("Hello, world!") ~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.expected_package_or_platform_name -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token +NIL ~~~TOKENS KwApp,OpenSquare,LowerIdent,CloseSquare,OpenCurly,LowerIdent,OpColon,KwPlatform,StringStart,StringPart,StringEnd,CloseCurly,Newline,KwImport,LowerIdent,NoSpaceDotUpperIdent,Newline,LowerIdent,OpAssign,OpBar,Underscore,OpBar,UpperIdent,NoSpaceDotLowerIdent,NoSpaceOpenRound,StringStart,StringPart,StringEnd,CloseRound,EndOfFile ~~~PARSE (file - (malformed_header 'expected_package_or_platform_name') - (malformed_expr 'unexpected_token') - (string '../basic-cli/platform.roc') - (malformed_expr 'unexpected_token') + (app 'TODO implement toSExpr for app module header') (import 'pf' '.Stdout' @@ -30,11 +25,5 @@ KwApp,OpenSquare,LowerIdent,CloseSquare,OpenCurly,LowerIdent,OpColon,KwPlatform, (ident 'Stdout' '.line!') (string 'Hello, world!'))))) ~~~FORMATTED - -"../basic-cli/platform.roc" - - -import pf.Stdout - -main! = |_| Stdout.line!("Hello, world!") +NO CHANGE ~~~END \ No newline at end of file diff --git a/src/snapshots/if_then_else.txt b/src/snapshots/if_then_else.txt index fd00f6cd51..2a74e66e07 100644 --- a/src/snapshots/if_then_else.txt +++ b/src/snapshots/if_then_else.txt @@ -14,7 +14,7 @@ NIL KwModule,OpenSquare,LowerIdent,CloseSquare,Newline,LowerIdent,OpAssign,KwIf,LowerIdent,UpperIdent,Newline,KwElse,OpenCurly,Newline,UpperIdent,Newline,CloseCurly,EndOfFile ~~~PARSE (file - (header + (module (exposed_item (lower_ident 'foo'))) (decl (ident 'foo') diff --git a/src/snapshots/some_folder/002.txt b/src/snapshots/some_folder/002.txt index 084d455adf..44dc634c66 100644 --- a/src/snapshots/some_folder/002.txt +++ b/src/snapshots/some_folder/002.txt @@ -12,7 +12,7 @@ NIL KwModule,OpenSquare,LowerIdent,Comma,LowerIdent,CloseSquare,Newline,LowerIdent,OpAssign,StringStart,StringPart,StringEnd,Newline,LowerIdent,OpAssign,StringStart,StringPart,StringEnd,EndOfFile ~~~PARSE (file - (header + (module (exposed_item (lower_ident 'foo')) (exposed_item (lower_ident 'bar'))) (decl diff --git a/src/snapshots/type_annotations.txt b/src/snapshots/type_annotations.txt index 02179b0525..a46512bc3e 100644 --- a/src/snapshots/type_annotations.txt +++ b/src/snapshots/type_annotations.txt @@ -14,7 +14,7 @@ NIL KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpColon,UpperIdent,Newline,LowerIdent,OpColon,UpperIdent,NoSpaceOpenRound,LowerIdent,Comma,LowerIdent,Comma,Underscore,CloseRound,Newline,LowerIdent,OpColon,OpenRound,LowerIdent,Comma,LowerIdent,Comma,LowerIdent,CloseRound,Newline,LowerIdent,OpColon,OpenRound,UpperIdent,Comma,UpperIdent,OpArrow,UpperIdent,CloseRound,Newline,LowerIdent,OpColon,UpperIdent,NoSpaceOpenRound,UpperIdent,CloseRound,OpArrow,UpperIdent,NoSpaceOpenRound,OpenCurly,CloseCurly,Comma,Underscore,CloseRound,EndOfFile ~~~PARSE (file - (header) + (module) (type_anno 'foo' (tag 'U64')) diff --git a/src/snapshots/type_declarations.txt b/src/snapshots/type_declarations.txt index 43fea454fd..fbc843cf57 100644 --- a/src/snapshots/type_declarations.txt +++ b/src/snapshots/type_declarations.txt @@ -18,7 +18,7 @@ NIL KwModule,OpenSquare,UpperIdent,Comma,UpperIdent,Comma,UpperIdent,Comma,UpperIdent,Comma,UpperIdent,Comma,LowerIdent,Comma,LowerIdent,CloseSquare,Newline,UpperIdent,LowerIdent,LowerIdent,OpColon,UpperIdent,NoSpaceOpenRound,LowerIdent,CloseRound,Comma,OpenRound,LowerIdent,OpArrow,LowerIdent,CloseRound,OpArrow,UpperIdent,NoSpaceOpenRound,LowerIdent,CloseRound,Newline,UpperIdent,OpColon,OpenRound,UpperIdent,Comma,UpperIdent,CloseRound,Newline,UpperIdent,LowerIdent,OpColon,OpenCurly,LowerIdent,OpColon,UpperIdent,NoSpaceOpenRound,LowerIdent,CloseRound,Comma,LowerIdent,OpColon,UpperIdent,CloseCurly,Newline,UpperIdent,LowerIdent,OpColon,OpenSquare,UpperIdent,NoSpaceOpenRound,LowerIdent,CloseRound,Comma,UpperIdent,CloseSquare,Newline,UpperIdent,LowerIdent,OpColon,UpperIdent,NoSpaceOpenRound,LowerIdent,CloseRound,Comma,LowerIdent,OpArrow,UpperIdent,NoSpaceOpenRound,LowerIdent,CloseRound,EndOfFile ~~~PARSE (file - (header + (module (exposed_item (upper_ident 'Map')) (exposed_item (upper_ident 'Foo')) (exposed_item (upper_ident 'Some')) From 365a9d52f7131a58928438016562dfa3d0895ca8 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 14:24:39 +1100 Subject: [PATCH 34/40] move tokenize diagnostic to method --- src/check/parse.zig | 66 -------------------- src/check/parse/tokenize.zig | 101 +++++++++++++++++++++++++++++++ src/problem.zig | 9 ++- src/snapshot.zig | 3 +- src/snapshots/fuzz_crash_003.txt | 5 +- src/snapshots/fuzz_crash_006.txt | Bin 390 -> 386 bytes src/snapshots/fuzz_crash_008.txt | 5 +- src/snapshots/fuzz_crash_009.txt | 12 +++- src/snapshots/fuzz_crash_010.txt | 18 +++++- 9 files changed, 140 insertions(+), 79 deletions(-) diff --git a/src/check/parse.zig b/src/check/parse.zig index ea9f10ebc2..a8b1a0ff59 100644 --- a/src/check/parse.zig +++ b/src/check/parse.zig @@ -44,69 +44,3 @@ pub fn parse(env: *base.ModuleEnv, source: []const u8) IR { .errors = errors, }; } - -fn lineNum(newlines: std.ArrayList(usize), pos: u32) u32 { - const pos_usize = @as(usize, @intCast(pos)); - - if (newlines.items.len == 0) { - return 0; - } - - var lineno: u32 = 0; - - while (lineno + 1 < newlines.items.len) { - if (newlines.items[lineno + 1] > pos_usize) { - return lineno; - } - lineno += 1; - } - - return lineno; -} - -/// TODO -- I think we should change this to be a method on Diagnostic -/// and then we can have the caller use this to format to a writer -/// this would be helpful for e.g. the snapshot tool which writes to a file instead of stderr -fn tokenizeReport(allocator: std.mem.Allocator, source: []const u8, msgs: []const tokenize.Diagnostic) void { - std.debug.print("Found the {d} following issues while parsing:\n", .{msgs.len}); - var newlines = std.ArrayList(usize).init(allocator); - defer newlines.deinit(); - newlines.append(0) catch |err| exitOnOom(err); - var pos: usize = 0; - for (source) |c| { - if (c == '\n') { - newlines.append(pos) catch |err| exitOnOom(err); - } - pos += 1; - } - for (msgs) |message| { - switch (message.tag) { - .MismatchedBrace => { - const start_line_num = lineNum(newlines, message.begin); - const start_col = message.begin - newlines.items[start_line_num]; - const end_line_num = lineNum(newlines, message.end); - const end_col = message.end - newlines.items[end_line_num]; - - const end_index = if (end_line_num + 1 < newlines.items.len) - end_line_num + 1 - else - end_line_num; - - const src = source[newlines.items[start_line_num]..newlines.items[end_index]]; - var spaces = std.ArrayList(u8).init(allocator); - defer spaces.deinit(); - for (0..start_col) |_| { - spaces.append(' ') catch |err| exitOnOom(err); - } - - std.debug.print( - "TOKENIZE ERROR: ({d}:{d}-{d}:{d}) Expected the correct closing brace here:\n{s}\n{s}^\n", - .{ start_line_num, start_col, end_line_num, end_col, src, spaces.items }, - ); - }, - else => { - std.debug.print("TOKENIZE ERROR: {any}\n", .{message}); - }, - } - } -} diff --git a/src/check/parse/tokenize.zig b/src/check/parse/tokenize.zig index a0e1696cca..b7122b4b42 100644 --- a/src/check/parse/tokenize.zig +++ b/src/check/parse/tokenize.zig @@ -1,4 +1,5 @@ const std = @import("std"); +const Allocator = std.mem.Allocator; const collections = @import("../../collections.zig"); const exitOnOom = @import("../../collections/utils.zig").exitOnOom; const base = @import("../../base.zig"); @@ -328,8 +329,108 @@ pub const Diagnostic = struct { OverClosedBrace, MismatchedBrace, }; + + pub fn toStr(self: Diagnostic, gpa: Allocator, source: []const u8, writer: anytype) !void { + var newlines = std.ArrayList(usize).init(gpa); + defer newlines.deinit(); + try newlines.append(0); + + var pos: usize = 0; + for (source) |c| { + if (c == '\n') { + try newlines.append(pos); + } + pos += 1; + } + + switch (self.tag) { + .MismatchedBrace => { + const start_line_num = lineNum(newlines, self.begin); + const start_col = self.begin - newlines.items[start_line_num]; + const end_line_num = lineNum(newlines, self.end); + const end_col = self.end - newlines.items[end_line_num]; + + const end_index = if (end_line_num + 1 < newlines.items.len) + end_line_num + 1 + else + end_line_num; + + const src = source[newlines.items[start_line_num]..newlines.items[end_index]]; + var spaces = std.ArrayList(u8).init(gpa); + defer spaces.deinit(); + for (0..start_col) |_| { + try spaces.append(' '); + } + + const error_message = try std.fmt.allocPrint( + gpa, + "TOKENIZE: ({d}:{d}-{d}:{d}) Expected the correct closing brace here:\n{s}\n{s}^\n", + .{ start_line_num, start_col, end_line_num, end_col, src, spaces.items }, + ); + defer gpa.free(error_message); + + try writer.writeAll(error_message); + }, + else => { + const start_line_num = lineNum(newlines, self.begin); + const start_col = self.begin - newlines.items[start_line_num]; + const end_line_num = lineNum(newlines, self.end); + const end_col = self.end - newlines.items[end_line_num]; + + const end_index = if (end_line_num + 1 < newlines.items.len) + newlines.items[end_line_num + 1] + else + source.len; + + const line_start = newlines.items[start_line_num]; + const src_line = source[line_start..end_index]; + + var spaces = std.ArrayList(u8).init(gpa); + defer spaces.deinit(); + for (0..start_col) |_| { + try spaces.append(' '); + } + + var carets = std.ArrayList(u8).init(gpa); + defer carets.deinit(); + for (0..end_col - start_col) |_| { + try carets.append('^'); + } + if (carets.items.len == 0) { + try carets.append('^'); + } + + const error_message = try std.fmt.allocPrint( + gpa, + "TOKENIZE: ({d}:{d}-{d}:{d}) {s}:\n{s}\n{s}{s}\n", + .{ start_line_num + 1, start_col + 1, end_line_num + 1, end_col + 1, @tagName(self.tag), src_line, spaces.items, carets.items }, + ); + defer gpa.free(error_message); + try writer.writeAll(error_message); + }, + } + } }; +fn lineNum(newlines: std.ArrayList(usize), pos: u32) u32 { + const pos_usize = @as(usize, @intCast(pos)); + + if (newlines.items.len == 0) { + return 0; + } + + var lineno: u32 = 0; + + while (lineno + 1 < newlines.items.len) { + if (newlines.items[lineno + 1] > pos_usize) { + return lineno; + } + lineno += 1; + } + + return lineno; +} + /// The cursor is our current position in the input text, and it collects messages. /// Note that instead of allocating its own message list, the caller must pass in a pre-allocated /// slice of Message. The field `message_count` tracks how many messages have been written. diff --git a/src/problem.zig b/src/problem.zig index 75b1bec09f..4207c3d464 100644 --- a/src/problem.zig +++ b/src/problem.zig @@ -79,17 +79,16 @@ pub const Problem = union(enum) { pub const Idx = List.Idx; /// Format a `Problem` for display. - pub fn format(self: @This(), comptime fmt: []const u8, options: std.fmt.FormatOptions, writer: anytype) !void { - _ = fmt; - _ = options; + pub fn toStr(self: @This(), gpa: Allocator, source: []const u8, writer: anytype) !void { // use a stack allocation for printing our tag errors var buf: [1000]u8 = undefined; switch (self) { .tokenize => |a| { - const str = try std.fmt.bufPrint(&buf, "{}", .{a.tag}); - try writer.writeAll(str); + try a.toStr(gpa, source, writer); + // const str = try std.fmt.bufPrint(&buf, "{}", .{a.tag}); + // try writer.writeAll(str); }, .parser => |a| { const str = try std.fmt.bufPrint(&buf, "{}", .{a.tag}); diff --git a/src/snapshot.zig b/src/snapshot.zig index ff3e37e8db..470a651eea 100644 --- a/src/snapshot.zig +++ b/src/snapshot.zig @@ -337,7 +337,8 @@ fn processSnapshotFile(gpa: Allocator, snapshot_path: []const u8, maybe_fuzz_cor if (module_env.problems.len() > 0) { var iter = module_env.problems.iterIndices(); while (iter.next()) |problem_idx| { - try module_env.problems.get(problem_idx).format("", .{}, file); + const problem = module_env.problems.get(problem_idx); + try problem.toStr(gpa, content.source, file); try file.writer().writeAll("\n"); } } else { diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt index 4aa3d0e6a1..40bb2e52d0 100644 --- a/src/snapshots/fuzz_crash_003.txt +++ b/src/snapshots/fuzz_crash_003.txt @@ -3,7 +3,10 @@ description=fuzz crash ~~~SOURCE = "te ~~~PROBLEMS -check.parse.tokenize.Diagnostic.Tag.UnclosedString +TOKENIZE: (1:5-1:7) UnclosedString: + = "te + ^^ + check.parse.IR.Diagnostic.Tag.missing_header ~~~TOKENS OpAssign,StringStart,StringPart,EndOfFile diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index 0eecf855f5e8b51b4cb6617bf5cb98e81f39068c..2a7b9af362dd1af2c49f96100e8971c6b2c317b2 100644 GIT binary patch delta 103 zcmZo;ZepHb8yn*9?dsXBDna=rnB}Mr;R$L4o{x$qHWn6Jw WTquf7fr?FWDOLc2IIfBFUH|~l8y!3V delta 107 zcmZo-ZeyNco0y!Dnw+gykXTfls#lVqotl?fm8$2GnV6oJUtE%ztQV4)uIE^soSEsI RpI1_ppTk9}hKaLY003SHDF6Tf diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt index 9fbd95a8f3..93a19f9db0 100644 --- a/src/snapshots/fuzz_crash_008.txt +++ b/src/snapshots/fuzz_crash_008.txt @@ -3,7 +3,10 @@ description=fuzz crash ~~~SOURCE ||1 ~~~PROBLEMS -check.parse.tokenize.Diagnostic.Tag.AsciiControl +TOKENIZE: (1:2-1:2) AsciiControl: +||1 + ^ + check.parse.IR.Diagnostic.Tag.missing_header check.parse.IR.Diagnostic.Tag.unexpected_token ~~~TOKENS diff --git a/src/snapshots/fuzz_crash_009.txt b/src/snapshots/fuzz_crash_009.txt index 7f2e84c49f..91d6fcf49d 100644 --- a/src/snapshots/fuzz_crash_009.txt +++ b/src/snapshots/fuzz_crash_009.txt @@ -8,8 +8,16 @@ foo = "onmo % ~~~PROBLEMS -check.parse.tokenize.Diagnostic.Tag.MismatchedBrace -check.parse.tokenize.Diagnostic.Tag.UnclosedString +TOKENIZE: (1:6-1:6) Expected the correct closing brace here: + + ] + ^ + +TOKENIZE: (6:7-6:13) UnclosedString: + + "onmo % + ^^^^^^ + check.parse.IR.Diagnostic.Tag.missing_header ~~~TOKENS LowerIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile diff --git a/src/snapshots/fuzz_crash_010.txt b/src/snapshots/fuzz_crash_010.txt index 923fc81c1c..6100ed12e2 100644 --- a/src/snapshots/fuzz_crash_010.txt +++ b/src/snapshots/fuzz_crash_010.txt @@ -8,9 +8,21 @@ foo = "on (string 'onmo %'))) ~~~PROBLEMS -check.parse.tokenize.Diagnostic.Tag.AsciiControl -check.parse.tokenize.Diagnostic.Tag.MismatchedBrace -check.parse.tokenize.Diagnostic.Tag.UnclosedString +TOKENIZE: (2:4-2:4) AsciiControl: + +  ] + ^ + +TOKENIZE: (1:6-1:6) Expected the correct closing brace here: + +  ] + ^ + +TOKENIZE: (6:7-6:36) UnclosedString: + + "on (string 'onmo %'))) + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + check.parse.IR.Diagnostic.Tag.missing_header ~~~TOKENS UpperIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile From 39e159617acefca7b3fde892158e1f572842ab28 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 14:28:25 +1100 Subject: [PATCH 35/40] improve formatting for Problems in snapshots --- src/check/parse/tokenize.zig | 2 +- src/problem.zig | 20 ++++++------- src/snapshots/expr_no_space_dot_int.txt | 2 +- src/snapshots/fuzz_crash_001.txt | 4 +-- src/snapshots/fuzz_crash_002.txt | 38 ++++++++++++------------ src/snapshots/fuzz_crash_003.txt | 3 +- src/snapshots/fuzz_crash_004.txt | 2 +- src/snapshots/fuzz_crash_005.txt | 2 +- src/snapshots/fuzz_crash_006.txt | Bin 386 -> 362 bytes src/snapshots/fuzz_crash_007.txt | 6 ++-- src/snapshots/fuzz_crash_008.txt | 5 ++-- src/snapshots/fuzz_crash_009.txt | 3 +- src/snapshots/fuzz_crash_010.txt | 4 +-- 13 files changed, 42 insertions(+), 49 deletions(-) diff --git a/src/check/parse/tokenize.zig b/src/check/parse/tokenize.zig index b7122b4b42..79318124c9 100644 --- a/src/check/parse/tokenize.zig +++ b/src/check/parse/tokenize.zig @@ -402,7 +402,7 @@ pub const Diagnostic = struct { const error_message = try std.fmt.allocPrint( gpa, - "TOKENIZE: ({d}:{d}-{d}:{d}) {s}:\n{s}\n{s}{s}\n", + "TOKENIZE: ({d}:{d}-{d}:{d}) {s}:\n{s}\n{s}{s}", .{ start_line_num + 1, start_col + 1, end_line_num + 1, end_col + 1, @tagName(self.tag), src_line, spaces.items, carets.items }, ); defer gpa.free(error_message); diff --git a/src/problem.zig b/src/problem.zig index 4207c3d464..c3635bff2a 100644 --- a/src/problem.zig +++ b/src/problem.zig @@ -85,20 +85,18 @@ pub const Problem = union(enum) { var buf: [1000]u8 = undefined; switch (self) { - .tokenize => |a| { - try a.toStr(gpa, source, writer); - // const str = try std.fmt.bufPrint(&buf, "{}", .{a.tag}); - // try writer.writeAll(str); - }, + .tokenize => |a| try a.toStr(gpa, source, writer), .parser => |a| { - const str = try std.fmt.bufPrint(&buf, "{}", .{a.tag}); - try writer.writeAll(str); + const err_msg = try std.fmt.bufPrint(&buf, "PARSER: {s}", .{@tagName(a.tag)}); + try writer.writeAll(err_msg); }, - .canonicalize => { - try writer.writeAll("CAN ERROR"); + .canonicalize => |err| { + const err_msg = try std.fmt.bufPrint(&buf, "CAN: {?}", .{err}); + try writer.writeAll(err_msg); }, - .compiler => { - try writer.writeAll("COMPILER ERROR"); + .compiler => |err| { + const err_msg = try std.fmt.bufPrint(&buf, "COMPILER: {?}", .{err}); + try writer.writeAll(err_msg); }, } } diff --git a/src/snapshots/expr_no_space_dot_int.txt b/src/snapshots/expr_no_space_dot_int.txt index ba64cd283d..3edf9fe796 100644 --- a/src/snapshots/expr_no_space_dot_int.txt +++ b/src/snapshots/expr_no_space_dot_int.txt @@ -5,7 +5,7 @@ module [] foo = asd.0 ~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.unexpected_token +PARSER: unexpected_token ~~~TOKENS KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,LowerIdent,NoSpaceDotInt,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index 3833330e7f..c638a39f71 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -3,8 +3,8 @@ description=fuzz crash ~~~SOURCE mo|% ~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.missing_header -check.parse.IR.Diagnostic.Tag.unexpected_token +PARSER: missing_header +PARSER: unexpected_token ~~~TOKENS LowerIdent,OpBar,OpPercent,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_002.txt b/src/snapshots/fuzz_crash_002.txt index b7c0c937ab..ca03ed7fbf 100644 --- a/src/snapshots/fuzz_crash_002.txt +++ b/src/snapshots/fuzz_crash_002.txt @@ -3,25 +3,25 @@ description=fuzz crash ~~~SOURCE modu:;::::::::::::::le[% ~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.missing_header -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token +PARSER: missing_header +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token +PARSER: unexpected_token ~~~TOKENS LowerIdent,OpColon,MalformedUnknownToken,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,LowerIdent,OpenSquare,OpPercent,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt index 40bb2e52d0..896b3921c6 100644 --- a/src/snapshots/fuzz_crash_003.txt +++ b/src/snapshots/fuzz_crash_003.txt @@ -6,8 +6,7 @@ description=fuzz crash TOKENIZE: (1:5-1:7) UnclosedString: = "te ^^ - -check.parse.IR.Diagnostic.Tag.missing_header +PARSER: missing_header ~~~TOKENS OpAssign,StringStart,StringPart,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt index 069c052a5f..ab89b1a10c 100644 --- a/src/snapshots/fuzz_crash_004.txt +++ b/src/snapshots/fuzz_crash_004.txt @@ -3,7 +3,7 @@ description=fuzz crash ~~~SOURCE F ~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.missing_header +PARSER: missing_header ~~~TOKENS UpperIdent,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt index d23f9ad827..ade10369ac 100644 --- a/src/snapshots/fuzz_crash_005.txt +++ b/src/snapshots/fuzz_crash_005.txt @@ -3,7 +3,7 @@ description=fuzz crash ~~~SOURCE modu ~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.missing_header +PARSER: missing_header ~~~TOKENS LowerIdent,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index 2a7b9af362dd1af2c49f96100e8971c6b2c317b2..6ef98d0bc2d5731beab6d8e8767b5450f6afa1a7 100644 GIT binary patch delta 21 ccmZo-e#JB)b7GbOM}T8cuxpT&!o-~?0aA+x*Z=?k delta 44 zcmaFG)Wkd?b7F>}JXdl?YI3$-L1IyHs-9<%o=aw8dR~5UNoKNMNMgF)#DymTV>A$S diff --git a/src/snapshots/fuzz_crash_007.txt b/src/snapshots/fuzz_crash_007.txt index a66e0c0a68..544c560478 100644 --- a/src/snapshots/fuzz_crash_007.txt +++ b/src/snapshots/fuzz_crash_007.txt @@ -3,9 +3,9 @@ description=fuzz crash ~~~SOURCE ff8.8.d ~~~PROBLEMS -check.parse.IR.Diagnostic.Tag.missing_header -check.parse.IR.Diagnostic.Tag.unexpected_token -check.parse.IR.Diagnostic.Tag.unexpected_token +PARSER: missing_header +PARSER: unexpected_token +PARSER: unexpected_token ~~~TOKENS LowerIdent,NoSpaceDotInt,NoSpaceDotLowerIdent,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt index 93a19f9db0..0a6362a777 100644 --- a/src/snapshots/fuzz_crash_008.txt +++ b/src/snapshots/fuzz_crash_008.txt @@ -6,9 +6,8 @@ description=fuzz crash TOKENIZE: (1:2-1:2) AsciiControl: ||1 ^ - -check.parse.IR.Diagnostic.Tag.missing_header -check.parse.IR.Diagnostic.Tag.unexpected_token +PARSER: missing_header +PARSER: unexpected_token ~~~TOKENS OpBar,OpBar,Int,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_009.txt b/src/snapshots/fuzz_crash_009.txt index 91d6fcf49d..91968425dd 100644 --- a/src/snapshots/fuzz_crash_009.txt +++ b/src/snapshots/fuzz_crash_009.txt @@ -17,8 +17,7 @@ TOKENIZE: (6:7-6:13) UnclosedString: "onmo % ^^^^^^ - -check.parse.IR.Diagnostic.Tag.missing_header +PARSER: missing_header ~~~TOKENS LowerIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile ~~~PARSE diff --git a/src/snapshots/fuzz_crash_010.txt b/src/snapshots/fuzz_crash_010.txt index 6100ed12e2..b8de587aad 100644 --- a/src/snapshots/fuzz_crash_010.txt +++ b/src/snapshots/fuzz_crash_010.txt @@ -12,7 +12,6 @@ TOKENIZE: (2:4-2:4) AsciiControl:  ] ^ - TOKENIZE: (1:6-1:6) Expected the correct closing brace here:  ] @@ -22,8 +21,7 @@ TOKENIZE: (6:7-6:36) UnclosedString: "on (string 'onmo %'))) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ - -check.parse.IR.Diagnostic.Tag.missing_header +PARSER: missing_header ~~~TOKENS UpperIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile ~~~PARSE From 2fc3cd9fb1ce0c99f965d1a9a79a8d4e4e8612e8 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 14:42:57 +1100 Subject: [PATCH 36/40] use double quotes for sexpr strings, fix caret position in tokenize problem formatting --- src/base/sexpr.zig | 2 +- src/check/parse/tokenize.zig | 6 +- src/snapshots/001.txt | 6 +- src/snapshots/003.txt | 22 +++---- src/snapshots/add_var_with_spaces.txt | 10 +-- src/snapshots/expr_if_missing_else.txt | 6 +- src/snapshots/expr_no_space_dot_int.txt | 6 +- src/snapshots/fuzz_crash_001.txt | 4 +- src/snapshots/fuzz_crash_002.txt | 38 ++++++------ src/snapshots/fuzz_crash_003.txt | 4 +- src/snapshots/fuzz_crash_004.txt | 2 +- src/snapshots/fuzz_crash_005.txt | 2 +- src/snapshots/fuzz_crash_006.txt | Bin 362 -> 362 bytes src/snapshots/fuzz_crash_007.txt | 6 +- src/snapshots/fuzz_crash_008.txt | 4 +- src/snapshots/fuzz_crash_009.txt | 8 +-- src/snapshots/fuzz_crash_010.txt | 8 +-- .../header_expected_open_bracket.txt | 2 +- src/snapshots/hello_world.txt | 14 ++--- src/snapshots/if_then_else.txt | 10 +-- src/snapshots/some_folder/002.txt | 12 ++-- src/snapshots/type_annotations.txt | 36 +++++------ src/snapshots/type_declarations.txt | 58 +++++++++--------- 23 files changed, 132 insertions(+), 134 deletions(-) diff --git a/src/base/sexpr.zig b/src/base/sexpr.zig index e07a7b8902..0994a7079d 100644 --- a/src/base/sexpr.zig +++ b/src/base/sexpr.zig @@ -129,7 +129,7 @@ pub const Expr = union(enum) { try writer.print(")", .{}); }, - .string => |s| try writer.print("'{s}'", .{s}), + .string => |s| try writer.print("\"{s}\"", .{s}), .signed_int => |i| try writer.print("{d}", .{i}), .unsigned_int => |u| try writer.print("{d}", .{u}), .float => |f| try writer.print("{any}", .{f}), diff --git a/src/check/parse/tokenize.zig b/src/check/parse/tokenize.zig index 79318124c9..76122efa6b 100644 --- a/src/check/parse/tokenize.zig +++ b/src/check/parse/tokenize.zig @@ -393,10 +393,8 @@ pub const Diagnostic = struct { var carets = std.ArrayList(u8).init(gpa); defer carets.deinit(); - for (0..end_col - start_col) |_| { - try carets.append('^'); - } - if (carets.items.len == 0) { + const caret_length = if (self.end > self.begin) self.end - self.begin else 1; + for (0..caret_length) |_| { try carets.append('^'); } diff --git a/src/snapshots/001.txt b/src/snapshots/001.txt index 55ceff8f18..6c93770b99 100644 --- a/src/snapshots/001.txt +++ b/src/snapshots/001.txt @@ -16,10 +16,10 @@ KwModule,OpenSquare,Newline,LowerIdent,Comma,Newline,CloseSquare,Newline,LowerId ~~~PARSE (file (module - (exposed_item (lower_ident 'foo'))) + (exposed_item (lower_ident "foo"))) (decl - (ident 'foo') - (string 'one'))) + (ident "foo") + (string "one"))) ~~~FORMATTED module [foo] diff --git a/src/snapshots/003.txt b/src/snapshots/003.txt index 36adb919c5..e997c32c4d 100644 --- a/src/snapshots/003.txt +++ b/src/snapshots/003.txt @@ -19,21 +19,21 @@ KwModule,OpenSquare,LowerIdent,CloseSquare,Newline,KwImport,LowerIdent,NoSpaceDo ~~~PARSE (file (module - (exposed_item (lower_ident 'decoder'))) + (exposed_item (lower_ident "decoder"))) (import - 'json' - '.Json' - '') + "json" + ".Json" + "") (decl - (ident 'decoder') - (ident 'Utf8' '.decode')) - (crash (string 'something')) + (ident "decoder") + (ident "Utf8" ".decode")) + (crash (string "something")) (expect (binop - '==' - (int '1') - (int '1'))) - (return (int '2'))) + "==" + (int "1") + (int "1"))) + (return (int "2"))) ~~~FORMATTED NO CHANGE ~~~END \ No newline at end of file diff --git a/src/snapshots/add_var_with_spaces.txt b/src/snapshots/add_var_with_spaces.txt index 3d2ac0a91c..f22c3f9eb9 100644 --- a/src/snapshots/add_var_with_spaces.txt +++ b/src/snapshots/add_var_with_spaces.txt @@ -11,13 +11,13 @@ KwModule,OpenSquare,LowerIdent,CloseSquare,Newline,LowerIdent,OpAssign,LowerIden ~~~PARSE (file (module - (exposed_item (lower_ident 'add2'))) + (exposed_item (lower_ident "add2"))) (decl - (ident 'add2') + (ident "add2") (binop - '+' - (ident '' 'x') - (int '2')))) + "+" + (ident "" "x") + (int "2")))) ~~~FORMATTED module [add2] diff --git a/src/snapshots/expr_if_missing_else.txt b/src/snapshots/expr_if_missing_else.txt index a70eb9475c..ae871f0def 100644 --- a/src/snapshots/expr_if_missing_else.txt +++ b/src/snapshots/expr_if_missing_else.txt @@ -12,9 +12,9 @@ KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,KwIf,LowerIdent,Lowe (file (module) (decl - (ident 'foo') - (malformed_expr 'expr_if_missing_else')) - (int '0')) + (ident "foo") + (malformed_expr "expr_if_missing_else")) + (int "0")) ~~~FORMATTED module [] diff --git a/src/snapshots/expr_no_space_dot_int.txt b/src/snapshots/expr_no_space_dot_int.txt index 3edf9fe796..d8bdbb05ac 100644 --- a/src/snapshots/expr_no_space_dot_int.txt +++ b/src/snapshots/expr_no_space_dot_int.txt @@ -12,9 +12,9 @@ KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,LowerIdent,NoSpaceDo (file (module) (decl - (ident 'foo') - (malformed_expr 'expr_no_space_dot_int')) - (malformed_expr 'unexpected_token')) + (ident "foo") + (malformed_expr "expr_no_space_dot_int")) + (malformed_expr "unexpected_token")) ~~~FORMATTED module [] diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index c638a39f71..36cd07661c 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -9,8 +9,8 @@ PARSER: unexpected_token LowerIdent,OpBar,OpPercent,EndOfFile ~~~PARSE (file - (malformed_header 'missing_header') - (malformed_expr 'unexpected_token')) + (malformed_header "missing_header") + (malformed_expr "unexpected_token")) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_002.txt b/src/snapshots/fuzz_crash_002.txt index ca03ed7fbf..66dca937ef 100644 --- a/src/snapshots/fuzz_crash_002.txt +++ b/src/snapshots/fuzz_crash_002.txt @@ -26,25 +26,25 @@ PARSER: unexpected_token LowerIdent,OpColon,MalformedUnknownToken,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,OpColon,LowerIdent,OpenSquare,OpPercent,EndOfFile ~~~PARSE (file - (malformed_header 'missing_header') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token') - (ident '' 'le') - (malformed_expr 'unexpected_token')) + (malformed_header "missing_header") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token") + (ident "" "le") + (malformed_expr "unexpected_token")) ~~~FORMATTED diff --git a/src/snapshots/fuzz_crash_003.txt b/src/snapshots/fuzz_crash_003.txt index 896b3921c6..1fef23b26f 100644 --- a/src/snapshots/fuzz_crash_003.txt +++ b/src/snapshots/fuzz_crash_003.txt @@ -11,8 +11,8 @@ PARSER: missing_header OpAssign,StringStart,StringPart,EndOfFile ~~~PARSE (file - (malformed_header 'missing_header') - (string 'te')) + (malformed_header "missing_header") + (string "te")) ~~~FORMATTED "te" ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_004.txt b/src/snapshots/fuzz_crash_004.txt index ab89b1a10c..22fabb1c13 100644 --- a/src/snapshots/fuzz_crash_004.txt +++ b/src/snapshots/fuzz_crash_004.txt @@ -7,7 +7,7 @@ PARSER: missing_header ~~~TOKENS UpperIdent,EndOfFile ~~~PARSE -(file (malformed_header 'missing_header')) +(file (malformed_header "missing_header")) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_005.txt b/src/snapshots/fuzz_crash_005.txt index ade10369ac..72e2fcdfcf 100644 --- a/src/snapshots/fuzz_crash_005.txt +++ b/src/snapshots/fuzz_crash_005.txt @@ -7,7 +7,7 @@ PARSER: missing_header ~~~TOKENS LowerIdent,EndOfFile ~~~PARSE -(file (malformed_header 'missing_header')) +(file (malformed_header "missing_header")) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_006.txt b/src/snapshots/fuzz_crash_006.txt index 6ef98d0bc2d5731beab6d8e8767b5450f6afa1a7..4004ba99d0a6e11931c3699df812cae85e35709e 100644 GIT binary patch delta 39 ucmaFG^onT%H=}@3Zf0?DW?p)HMrvY8YLU`pX+|A3B_#!=GNsABjIsa+uM4OE delta 39 ucmaFG^onT%H=}@hZf0?DW?p)HMrvY8YLWV6X+|A3b#(>xGWE&6jIsa;H4DZ7 diff --git a/src/snapshots/fuzz_crash_007.txt b/src/snapshots/fuzz_crash_007.txt index 544c560478..9695a5f700 100644 --- a/src/snapshots/fuzz_crash_007.txt +++ b/src/snapshots/fuzz_crash_007.txt @@ -10,9 +10,9 @@ PARSER: unexpected_token LowerIdent,NoSpaceDotInt,NoSpaceDotLowerIdent,EndOfFile ~~~PARSE (file - (malformed_header 'missing_header') - (malformed_expr 'unexpected_token') - (malformed_expr 'unexpected_token')) + (malformed_header "missing_header") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token")) ~~~FORMATTED diff --git a/src/snapshots/fuzz_crash_008.txt b/src/snapshots/fuzz_crash_008.txt index 0a6362a777..255939190b 100644 --- a/src/snapshots/fuzz_crash_008.txt +++ b/src/snapshots/fuzz_crash_008.txt @@ -12,8 +12,8 @@ PARSER: unexpected_token OpBar,OpBar,Int,EndOfFile ~~~PARSE (file - (malformed_header 'missing_header') - (malformed_expr 'unexpected_token')) + (malformed_header "missing_header") + (malformed_expr "unexpected_token")) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_009.txt b/src/snapshots/fuzz_crash_009.txt index 91968425dd..a9d46699ca 100644 --- a/src/snapshots/fuzz_crash_009.txt +++ b/src/snapshots/fuzz_crash_009.txt @@ -22,11 +22,11 @@ PARSER: missing_header LowerIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile ~~~PARSE (file - (malformed_header 'missing_header') - (record (field 'o')) + (malformed_header "missing_header") + (record (field "o")) (decl - (ident 'foo') - (string 'onmo %'))) + (ident "foo") + (string "onmo %"))) ~~~FORMATTED { o } diff --git a/src/snapshots/fuzz_crash_010.txt b/src/snapshots/fuzz_crash_010.txt index b8de587aad..38b5b3e4b3 100644 --- a/src/snapshots/fuzz_crash_010.txt +++ b/src/snapshots/fuzz_crash_010.txt @@ -26,11 +26,11 @@ PARSER: missing_header UpperIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile ~~~PARSE (file - (malformed_header 'missing_header') - (record (field 'o')) + (malformed_header "missing_header") + (record (field "o")) (decl - (ident 'foo') - (string 'on (string 'onmo %')))'))) + (ident "foo") + (string "on (string 'onmo %')))"))) ~~~FORMATTED { o } diff --git a/src/snapshots/header_expected_open_bracket.txt b/src/snapshots/header_expected_open_bracket.txt index 9aa7d6b490..ef3584caa7 100644 --- a/src/snapshots/header_expected_open_bracket.txt +++ b/src/snapshots/header_expected_open_bracket.txt @@ -7,7 +7,7 @@ NIL ~~~TOKENS KwModule,EndOfFile ~~~PARSE -(file (malformed_header 'header_expected_open_bracket')) +(file (malformed_header "header_expected_open_bracket")) ~~~FORMATTED ~~~END \ No newline at end of file diff --git a/src/snapshots/hello_world.txt b/src/snapshots/hello_world.txt index 5bf4c881d6..997b02476d 100644 --- a/src/snapshots/hello_world.txt +++ b/src/snapshots/hello_world.txt @@ -12,18 +12,18 @@ NIL KwApp,OpenSquare,LowerIdent,CloseSquare,OpenCurly,LowerIdent,OpColon,KwPlatform,StringStart,StringPart,StringEnd,CloseCurly,Newline,KwImport,LowerIdent,NoSpaceDotUpperIdent,Newline,LowerIdent,OpAssign,OpBar,Underscore,OpBar,UpperIdent,NoSpaceDotLowerIdent,NoSpaceOpenRound,StringStart,StringPart,StringEnd,CloseRound,EndOfFile ~~~PARSE (file - (app 'TODO implement toSExpr for app module header') + (app "TODO implement toSExpr for app module header") (import - 'pf' - '.Stdout' - '') + "pf" + ".Stdout" + "") (decl - (ident 'main!') + (ident "main!") (lambda (args (underscore)) (apply - (ident 'Stdout' '.line!') - (string 'Hello, world!'))))) + (ident "Stdout" ".line!") + (string "Hello, world!"))))) ~~~FORMATTED NO CHANGE ~~~END \ No newline at end of file diff --git a/src/snapshots/if_then_else.txt b/src/snapshots/if_then_else.txt index 2a74e66e07..b27541ac84 100644 --- a/src/snapshots/if_then_else.txt +++ b/src/snapshots/if_then_else.txt @@ -15,13 +15,13 @@ KwModule,OpenSquare,LowerIdent,CloseSquare,Newline,LowerIdent,OpAssign,KwIf,Lowe ~~~PARSE (file (module - (exposed_item (lower_ident 'foo'))) + (exposed_item (lower_ident "foo"))) (decl - (ident 'foo') + (ident "foo") (if_then_else - (ident '' 'true') - (tag 'A') - (block (tag 'B'))))) + (ident "" "true") + (tag "A") + (block (tag "B"))))) ~~~FORMATTED module [foo] diff --git a/src/snapshots/some_folder/002.txt b/src/snapshots/some_folder/002.txt index 44dc634c66..b6e45a4ad3 100644 --- a/src/snapshots/some_folder/002.txt +++ b/src/snapshots/some_folder/002.txt @@ -13,14 +13,14 @@ KwModule,OpenSquare,LowerIdent,Comma,LowerIdent,CloseSquare,Newline,LowerIdent,O ~~~PARSE (file (module - (exposed_item (lower_ident 'foo')) - (exposed_item (lower_ident 'bar'))) + (exposed_item (lower_ident "foo")) + (exposed_item (lower_ident "bar"))) (decl - (ident 'foo') - (string 'one')) + (ident "foo") + (string "one")) (decl - (ident 'bar') - (string 'two'))) + (ident "bar") + (string "two"))) ~~~FORMATTED NO CHANGE ~~~END \ No newline at end of file diff --git a/src/snapshots/type_annotations.txt b/src/snapshots/type_annotations.txt index a46512bc3e..0e046c0c11 100644 --- a/src/snapshots/type_annotations.txt +++ b/src/snapshots/type_annotations.txt @@ -16,37 +16,37 @@ KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpColon,UpperIdent,Newline,Lo (file (module) (type_anno - 'foo' - (tag 'U64')) + "foo" + (tag "U64")) (type_anno - 'bar' + "bar" (tag - 'Thing' - (ty_var 'a') - (ty_var 'b') + "Thing" + (ty_var "a") + (ty_var "b") (_))) (type_anno - 'baz' + "baz" (tuple - (ty_var 'a') - (ty_var 'b') - (ty_var 'c'))) + (ty_var "a") + (ty_var "b") + (ty_var "c"))) (type_anno - 'add_one' + "add_one" (fn - (tag 'U32') - (tag 'U8') - (tag 'U16'))) + (tag "U32") + (tag "U8") + (tag "U16"))) (type_anno - 'main!' + "main!" (fn (tag - 'Result' + "Result" (record) (_)) (tag - 'List' - (tag 'String'))))) + "List" + (tag "String"))))) ~~~FORMATTED NO CHANGE ~~~END \ No newline at end of file diff --git a/src/snapshots/type_declarations.txt b/src/snapshots/type_declarations.txt index fbc843cf57..96dd06837a 100644 --- a/src/snapshots/type_declarations.txt +++ b/src/snapshots/type_declarations.txt @@ -19,49 +19,49 @@ KwModule,OpenSquare,UpperIdent,Comma,UpperIdent,Comma,UpperIdent,Comma,UpperIden ~~~PARSE (file (module - (exposed_item (upper_ident 'Map')) - (exposed_item (upper_ident 'Foo')) - (exposed_item (upper_ident 'Some')) - (exposed_item (upper_ident 'Maybe')) - (exposed_item (upper_ident 'SomeFunc')) - (exposed_item (lower_ident 'add_one')) - (exposed_item (lower_ident 'main!'))) + (exposed_item (upper_ident "Map")) + (exposed_item (upper_ident "Foo")) + (exposed_item (upper_ident "Some")) + (exposed_item (upper_ident "Maybe")) + (exposed_item (upper_ident "SomeFunc")) + (exposed_item (lower_ident "add_one")) + (exposed_item (lower_ident "main!"))) (type_decl (header - 'Map' - 'a' - 'b') + "Map" + "a" + "b") (fn (tag - 'List' - (ty_var 'b')) + "List" + (ty_var "b")) (tag - 'List' - (ty_var 'a')) + "List" + (ty_var "a")) (fn - (ty_var 'b') - (ty_var 'a')))) + (ty_var "b") + (ty_var "a")))) (type_decl - (header 'Foo') + (header "Foo") (tuple - (tag 'Bar') - (tag 'Baz'))) + (tag "Bar") + (tag "Baz"))) (type_decl - (header 'Some' 'a') - (record '' '')) + (header "Some" "a") + (record "" "")) (type_decl - (header 'Maybe' 'a') - (tag_union 'TODO tags' 'TODO open_anno')) + (header "Maybe" "a") + (tag_union "TODO tags" "TODO open_anno")) (type_decl - (header 'SomeFunc' 'a') + (header "SomeFunc" "a") (fn (tag - 'Maybe' - (ty_var 'a')) + "Maybe" + (ty_var "a")) (tag - 'Maybe' - (ty_var 'a')) - (ty_var 'a')))) + "Maybe" + (ty_var "a")) + (ty_var "a")))) ~~~FORMATTED NO CHANGE ~~~END \ No newline at end of file From 000b9c2c2e9a17004478a51471796e89872a2453 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 14:48:17 +1100 Subject: [PATCH 37/40] fix broken sexpr test --- src/base/sexpr.zig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/base/sexpr.zig b/src/base/sexpr.zig index 0994a7079d..46ae9d7225 100644 --- a/src/base/sexpr.zig +++ b/src/base/sexpr.zig @@ -191,7 +191,7 @@ test "s-expression" { foo.toStringPretty(buf.writer().any()); const expected = \\(foo - \\ 'bar' + \\ "bar" \\ -123 \\ (baz 456 7.89e2)) ; From 58da88d0643afea9f6ee04f6554e2c62481b2a70 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 15:15:45 +1100 Subject: [PATCH 38/40] simplify tokenize problem formatting, use helpers and add unit test to correct caret positioning --- src/check/parse/tokenize.zig | 209 ++++++++++++++++++++++--------- src/snapshots/fuzz_crash_009.txt | 11 +- src/snapshots/fuzz_crash_010.txt | 17 +-- 3 files changed, 159 insertions(+), 78 deletions(-) diff --git a/src/check/parse/tokenize.zig b/src/check/parse/tokenize.zig index 76122efa6b..f2d6fa0d64 100644 --- a/src/check/parse/tokenize.zig +++ b/src/check/parse/tokenize.zig @@ -333,83 +333,55 @@ pub const Diagnostic = struct { pub fn toStr(self: Diagnostic, gpa: Allocator, source: []const u8, writer: anytype) !void { var newlines = std.ArrayList(usize).init(gpa); defer newlines.deinit(); + try newlines.append(0); + // Find all newlines in the source var pos: usize = 0; for (source) |c| { if (c == '\n') { - try newlines.append(pos); + try newlines.append(pos + 1); // Position after the newline } pos += 1; } - switch (self.tag) { - .MismatchedBrace => { - const start_line_num = lineNum(newlines, self.begin); - const start_col = self.begin - newlines.items[start_line_num]; - const end_line_num = lineNum(newlines, self.end); - const end_col = self.end - newlines.items[end_line_num]; - - const end_index = if (end_line_num + 1 < newlines.items.len) - end_line_num + 1 - else - end_line_num; - - const src = source[newlines.items[start_line_num]..newlines.items[end_index]]; - var spaces = std.ArrayList(u8).init(gpa); - defer spaces.deinit(); - for (0..start_col) |_| { - try spaces.append(' '); - } + // Get position information + const info = getDiagnosticPositionInfo(source, newlines, self.begin, self.end); - const error_message = try std.fmt.allocPrint( - gpa, - "TOKENIZE: ({d}:{d}-{d}:{d}) Expected the correct closing brace here:\n{s}\n{s}^\n", - .{ start_line_num, start_col, end_line_num, end_col, src, spaces.items }, - ); - defer gpa.free(error_message); + // Strip trailing newline for display + const display_text = if (info.line_text.len > 0 and + (info.line_text[info.line_text.len - 1] == '\n' or + info.line_text[info.line_text.len - 1] == '\r')) + info.line_text[0 .. info.line_text.len - 1] + else + info.line_text; - try writer.writeAll(error_message); - }, - else => { - const start_line_num = lineNum(newlines, self.begin); - const start_col = self.begin - newlines.items[start_line_num]; - const end_line_num = lineNum(newlines, self.end); - const end_col = self.end - newlines.items[end_line_num]; - - const end_index = if (end_line_num + 1 < newlines.items.len) - newlines.items[end_line_num + 1] - else - source.len; - - const line_start = newlines.items[start_line_num]; - const src_line = source[line_start..end_index]; - - var spaces = std.ArrayList(u8).init(gpa); - defer spaces.deinit(); - for (0..start_col) |_| { - try spaces.append(' '); - } + var spaces = std.ArrayList(u8).init(gpa); + defer spaces.deinit(); + for (0..info.start_col) |_| { + try spaces.append(' '); + } - var carets = std.ArrayList(u8).init(gpa); - defer carets.deinit(); - const caret_length = if (self.end > self.begin) self.end - self.begin else 1; - for (0..caret_length) |_| { - try carets.append('^'); - } + var carets = std.ArrayList(u8).init(gpa); + defer carets.deinit(); - const error_message = try std.fmt.allocPrint( - gpa, - "TOKENIZE: ({d}:{d}-{d}:{d}) {s}:\n{s}\n{s}{s}", - .{ start_line_num + 1, start_col + 1, end_line_num + 1, end_col + 1, @tagName(self.tag), src_line, spaces.items, carets.items }, - ); - defer gpa.free(error_message); - try writer.writeAll(error_message); - }, + const caret_length = if (self.end > self.begin) self.end - self.begin else 1; + for (0..caret_length) |_| { + try carets.append('^'); } + + const error_message = try std.fmt.allocPrint( + gpa, + "TOKENIZE: ({d}:{d}-{d}:{d}) {s}:\n{s}\n{s}{s}", + .{ info.start_line + 1, info.start_col + 1, info.end_line + 1, info.end_col + 1, @tagName(self.tag), display_text, spaces.items, carets.items }, + ); + defer gpa.free(error_message); + + try writer.writeAll(error_message); } }; +/// Finds the line number for a given position in the source fn lineNum(newlines: std.ArrayList(usize), pos: u32) u32 { const pos_usize = @as(usize, @intCast(pos)); @@ -429,6 +401,123 @@ fn lineNum(newlines: std.ArrayList(usize), pos: u32) u32 { return lineno; } +/// Gets the column number for a position on a given line +fn columnNum(newlines: std.ArrayList(usize), line: u32, pos: u32) u32 { + const line_start: u32 = @intCast(newlines.items[line]); + return pos - line_start; +} + +/// Returns the source text for a given line +fn getLineText(source: []const u8, newlines: std.ArrayList(usize), line: u32) []const u8 { + const line_start = newlines.items[line]; + const line_end = if (line + 1 < newlines.items.len) + newlines.items[line + 1] + else + source.len; + + return source[line_start..line_end]; +} + +/// Returns the position info for a diagnostic +fn getDiagnosticPositionInfo(source: []const u8, newlines: std.ArrayList(usize), begin: u32, end: u32) struct { start_line: u32, start_col: u32, end_line: u32, end_col: u32, line_text: []const u8 } { + const start_line = lineNum(newlines, begin); + const start_col = columnNum(newlines, start_line, begin); + const end_line = lineNum(newlines, end); + const end_col = columnNum(newlines, end_line, end); + const line_text = getLineText(source, newlines, start_line); + + return .{ + .start_line = start_line, + .start_col = start_col, + .end_line = end_line, + .end_col = end_col, + .line_text = line_text, + }; +} + +test "lineNum" { + const gpa = std.testing.allocator; + var newlines = std.ArrayList(usize).init(gpa); + defer newlines.deinit(); + + // Simple test case with lines at positions 0, 10, 20 + try newlines.append(0); + try newlines.append(10); + try newlines.append(20); + try newlines.append(30); + + try std.testing.expectEqual(@as(u32, 0), lineNum(newlines, 0)); + try std.testing.expectEqual(@as(u32, 0), lineNum(newlines, 5)); + try std.testing.expectEqual(@as(u32, 0), lineNum(newlines, 9)); + try std.testing.expectEqual(@as(u32, 1), lineNum(newlines, 10)); + try std.testing.expectEqual(@as(u32, 1), lineNum(newlines, 15)); + try std.testing.expectEqual(@as(u32, 1), lineNum(newlines, 19)); + try std.testing.expectEqual(@as(u32, 2), lineNum(newlines, 20)); + try std.testing.expectEqual(@as(u32, 2), lineNum(newlines, 25)); + try std.testing.expectEqual(@as(u32, 2), lineNum(newlines, 29)); + try std.testing.expectEqual(@as(u32, 3), lineNum(newlines, 30)); + try std.testing.expectEqual(@as(u32, 3), lineNum(newlines, 35)); +} + +test "columnNum" { + const gpa = std.testing.allocator; + var newlines = std.ArrayList(usize).init(gpa); + defer newlines.deinit(); + + try newlines.append(0); + try newlines.append(10); + try newlines.append(20); + + try std.testing.expectEqual(@as(u32, 0), columnNum(newlines, 0, 0)); + try std.testing.expectEqual(@as(u32, 5), columnNum(newlines, 0, 5)); + try std.testing.expectEqual(@as(u32, 9), columnNum(newlines, 0, 9)); + + try std.testing.expectEqual(@as(u32, 0), columnNum(newlines, 1, 10)); + try std.testing.expectEqual(@as(u32, 5), columnNum(newlines, 1, 15)); +} + +test "getLineText" { + const gpa = std.testing.allocator; + var newlines = std.ArrayList(usize).init(gpa); + defer newlines.deinit(); + + const source = "line0\nline1\nline2"; + + try newlines.append(0); + try newlines.append(6); // After "line0\n" + try newlines.append(12); // After "line1\n" + + try std.testing.expectEqualStrings("line0\n", getLineText(source, newlines, 0)); + try std.testing.expectEqualStrings("line1\n", getLineText(source, newlines, 1)); + try std.testing.expectEqualStrings("line2", getLineText(source, newlines, 2)); +} + +test "getDiagnosticPositionInfo" { + const gpa = std.testing.allocator; + var newlines = std.ArrayList(usize).init(gpa); + defer newlines.deinit(); + + const source = "line0\nline1\nline2"; + + try newlines.append(0); + try newlines.append(6); // After "line0\n" + try newlines.append(12); // After "line1\n" + + const info1 = getDiagnosticPositionInfo(source, newlines, 2, 4); // "ne" in line0 + try std.testing.expectEqual(@as(u32, 0), info1.start_line); + try std.testing.expectEqual(@as(u32, 2), info1.start_col); + try std.testing.expectEqual(@as(u32, 0), info1.end_line); + try std.testing.expectEqual(@as(u32, 4), info1.end_col); + try std.testing.expectEqualStrings("line0\n", info1.line_text); + + const info2 = getDiagnosticPositionInfo(source, newlines, 8, 10); // "ne" in line1 + try std.testing.expectEqual(@as(u32, 1), info2.start_line); + try std.testing.expectEqual(@as(u32, 2), info2.start_col); + try std.testing.expectEqual(@as(u32, 1), info2.end_line); + try std.testing.expectEqual(@as(u32, 4), info2.end_col); + try std.testing.expectEqualStrings("line1\n", info2.line_text); +} + /// The cursor is our current position in the input text, and it collects messages. /// Note that instead of allocating its own message list, the caller must pass in a pre-allocated /// slice of Message. The field `message_count` tracks how many messages have been written. diff --git a/src/snapshots/fuzz_crash_009.txt b/src/snapshots/fuzz_crash_009.txt index a9d46699ca..980674f056 100644 --- a/src/snapshots/fuzz_crash_009.txt +++ b/src/snapshots/fuzz_crash_009.txt @@ -8,15 +8,12 @@ foo = "onmo % ~~~PROBLEMS -TOKENIZE: (1:6-1:6) Expected the correct closing brace here: - +TOKENIZE: (2:6-2:6) MismatchedBrace: ] - ^ - -TOKENIZE: (6:7-6:13) UnclosedString: - + ^ +TOKENIZE: (6:6-6:12) UnclosedString: "onmo % - ^^^^^^ + ^^^^^^ PARSER: missing_header ~~~TOKENS LowerIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile diff --git a/src/snapshots/fuzz_crash_010.txt b/src/snapshots/fuzz_crash_010.txt index 38b5b3e4b3..cc9a9a6568 100644 --- a/src/snapshots/fuzz_crash_010.txt +++ b/src/snapshots/fuzz_crash_010.txt @@ -3,24 +3,19 @@ description=fuzz crash ~~~SOURCE H{o,  ] - foo = "on (string 'onmo %'))) ~~~PROBLEMS -TOKENIZE: (2:4-2:4) AsciiControl: - +TOKENIZE: (2:3-2:3) AsciiControl:  ] - ^ -TOKENIZE: (1:6-1:6) Expected the correct closing brace here: - + ^ +TOKENIZE: (2:6-2:6) MismatchedBrace:  ] - ^ - -TOKENIZE: (6:7-6:36) UnclosedString: - + ^ +TOKENIZE: (5:6-5:35) UnclosedString: "on (string 'onmo %'))) - ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ PARSER: missing_header ~~~TOKENS UpperIdent,OpenCurly,LowerIdent,Comma,Newline,CloseCurly,Newline,LowerIdent,OpAssign,Newline,StringStart,StringPart,EndOfFile From 680c4346a7947c69c2ed224288d2130c6cda6fd7 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 21:01:47 +1100 Subject: [PATCH 39/40] use pushMalformed --- src/check/parse/IR.zig | 4 ++-- src/check/parse/Parser.zig | 22 ++++++++++--------- src/snapshots/expr_if_missing_else.txt | 7 ++---- src/snapshots/expr_no_space_dot_int.txt | 7 ++---- src/snapshots/fuzz_crash_001.txt | 1 + src/snapshots/fuzz_crash_011.txt | 18 +++++++++++++++ .../header_expected_open_bracket.txt | 4 ++-- 7 files changed, 39 insertions(+), 24 deletions(-) create mode 100644 src/snapshots/fuzz_crash_011.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index b090358c0b..d22fa6f151 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -61,9 +61,9 @@ pub const Diagnostic = struct { expected_package_or_platform_string, expected_package_platform_close_curly, expect_closing_paren, - header_expected_open_bracket, + header_expected_open_square, + header_expected_close_square, header_unexpected_token, - header_expected_close_bracket, pattern_unexpected_token, ty_anno_unexpected_token, statement_unexpected_eof, diff --git a/src/check/parse/Parser.zig b/src/check/parse/Parser.zig index d12325b81d..495bb25474 100644 --- a/src/check/parse/Parser.zig +++ b/src/check/parse/Parser.zig @@ -217,14 +217,16 @@ fn parseModuleHeader(self: *Parser) IR.NodeStore.HeaderIdx { // Get exposes self.expect(.OpenSquare) catch { - return self.store.addMalformed(IR.NodeStore.HeaderIdx, .header_expected_open_bracket, self.pos); + return self.pushMalformed(IR.NodeStore.HeaderIdx, .header_expected_open_square); }; const scratch_top = self.store.scratchExposedItemTop(); self.parseCollectionSpan(IR.NodeStore.ExposedItemIdx, .CloseSquare, IR.NodeStore.addScratchExposedItem, Parser.parseExposedItem) catch { while (self.peek() != .CloseSquare and self.peek() != .EndOfFile) { self.advance(); } - self.expect(.CloseSquare) catch {}; + self.expect(.CloseSquare) catch { + return self.pushMalformed(IR.NodeStore.HeaderIdx, .header_expected_close_square); + }; self.store.clearScratchExposedItemsFrom(scratch_top); return self.pushMalformed(IR.NodeStore.HeaderIdx, .import_exposing_no_close); }; @@ -255,9 +257,10 @@ pub fn parseAppHeader(self: *Parser) IR.NodeStore.HeaderIdx { while (self.peek() != .CloseSquare and self.peek() != .EndOfFile) { self.advance(); } - self.expect(.CloseSquare) catch {}; + self.expect(.CloseSquare) catch { + return self.pushMalformed(IR.NodeStore.HeaderIdx, .header_expected_close_square); + }; self.store.clearScratchExposedItemsFrom(scratch_top); - return self.pushMalformed(IR.NodeStore.HeaderIdx, .import_exposing_no_close); }; const provides = self.store.exposedItemSpanFrom(scratch_top); @@ -271,7 +274,6 @@ pub fn parseAppHeader(self: *Parser) IR.NodeStore.HeaderIdx { const entry_start = self.pos; if (self.peek() != .LowerIdent) { self.store.clearScratchRecordFieldsFrom(fields_scratch_top); - std.debug.print("GOT {}", .{self.peek()}); return self.pushMalformed(IR.NodeStore.HeaderIdx, .expected_package_or_platform_name); } const name_tok = self.pos; @@ -719,7 +721,7 @@ pub fn parsePattern(self: *Parser, alternatives: Alternatives) IR.NodeStore.Patt } }); }, else => { - return self.store.addMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token, self.pos); + return self.pushMalformed(IR.NodeStore.PatternIdx, .pattern_unexpected_token); }, } @@ -945,7 +947,7 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { const condition = self.parseExpr(); const then = self.parseExpr(); if (self.peek() != .KwElse) { - return self.store.addMalformed(IR.NodeStore.ExprIdx, .expr_if_missing_else, self.pos); + return self.pushMalformed(IR.NodeStore.ExprIdx, .expr_if_missing_else); } self.advance(); const else_idx = self.parseExpr(); @@ -1004,7 +1006,7 @@ pub fn parseExprWithBp(self: *Parser, min_bp: u8) IR.NodeStore.ExprIdx { while (self.peek() == .NoSpaceDotInt or self.peek() == .NoSpaceDotLowerIdent) { const tok = self.peek(); if (tok == .NoSpaceDotInt) { - return self.store.addMalformed(IR.NodeStore.ExprIdx, .expr_no_space_dot_int, self.pos); + return self.pushMalformed(IR.NodeStore.ExprIdx, .expr_no_space_dot_int); } else { // NoSpaceDotLowerIdent const s = self.pos; const ident = self.store.addExpr(.{ .ident = .{ @@ -1144,7 +1146,7 @@ pub fn parseStringExpr(self: *Parser) IR.NodeStore.ExprIdx { }, else => { // Something is broken in the tokenizer if we get here! - return self.store.addMalformed(IR.NodeStore.ExprIdx, .string_unexpected_token, self.pos); + return self.pushMalformed(IR.NodeStore.ExprIdx, .string_unexpected_token); }, } } @@ -1321,7 +1323,7 @@ pub fn parseTypeAnno(self: *Parser, looking_for_args: TyFnArgs) IR.NodeStore.Typ self.advance(); // Advance past Underscore }, else => { - return self.store.addMalformed(IR.NodeStore.TypeAnnoIdx, .ty_anno_unexpected_token, self.pos); + return self.pushMalformed(IR.NodeStore.TypeAnnoIdx, .ty_anno_unexpected_token); }, } diff --git a/src/snapshots/expr_if_missing_else.txt b/src/snapshots/expr_if_missing_else.txt index ae871f0def..97310513a7 100644 --- a/src/snapshots/expr_if_missing_else.txt +++ b/src/snapshots/expr_if_missing_else.txt @@ -5,7 +5,7 @@ module [] foo = if tru then 0 ~~~PROBLEMS -NIL +PARSER: expr_if_missing_else ~~~TOKENS KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,KwIf,LowerIdent,LowerIdent,Int,EndOfFile ~~~PARSE @@ -13,12 +13,9 @@ KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,KwIf,LowerIdent,Lowe (module) (decl (ident "foo") - (malformed_expr "expr_if_missing_else")) - (int "0")) + (malformed_expr "expr_if_missing_else"))) ~~~FORMATTED module [] foo = - -0 ~~~END \ No newline at end of file diff --git a/src/snapshots/expr_no_space_dot_int.txt b/src/snapshots/expr_no_space_dot_int.txt index d8bdbb05ac..e58581c397 100644 --- a/src/snapshots/expr_no_space_dot_int.txt +++ b/src/snapshots/expr_no_space_dot_int.txt @@ -5,7 +5,7 @@ module [] foo = asd.0 ~~~PROBLEMS -PARSER: unexpected_token +PARSER: expr_no_space_dot_int ~~~TOKENS KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,LowerIdent,NoSpaceDotInt,EndOfFile ~~~PARSE @@ -13,12 +13,9 @@ KwModule,OpenSquare,CloseSquare,Newline,LowerIdent,OpAssign,LowerIdent,NoSpaceDo (module) (decl (ident "foo") - (malformed_expr "expr_no_space_dot_int")) - (malformed_expr "unexpected_token")) + (malformed_expr "expr_no_space_dot_int"))) ~~~FORMATTED module [] foo = - - ~~~END \ No newline at end of file diff --git a/src/snapshots/fuzz_crash_001.txt b/src/snapshots/fuzz_crash_001.txt index 36cd07661c..1ad5fe3b94 100644 --- a/src/snapshots/fuzz_crash_001.txt +++ b/src/snapshots/fuzz_crash_001.txt @@ -4,6 +4,7 @@ description=fuzz crash mo|% ~~~PROBLEMS PARSER: missing_header +PARSER: pattern_unexpected_token PARSER: unexpected_token ~~~TOKENS LowerIdent,OpBar,OpPercent,EndOfFile diff --git a/src/snapshots/fuzz_crash_011.txt b/src/snapshots/fuzz_crash_011.txt new file mode 100644 index 0000000000..8ac5068116 --- /dev/null +++ b/src/snapshots/fuzz_crash_011.txt @@ -0,0 +1,18 @@ +~~~META +description=fuzz crash +~~~SOURCE +module P]F +~~~PROBLEMS +TOKENIZE: (1:9-1:9) OverClosedBrace: +module P]F + ^ +PARSER: header_expected_open_square +~~~TOKENS +KwModule,UpperIdent,UpperIdent,EndOfFile +~~~PARSE +(file + (malformed_header "header_expected_open_square") + (tag "F")) +~~~FORMATTED +F +~~~END \ No newline at end of file diff --git a/src/snapshots/header_expected_open_bracket.txt b/src/snapshots/header_expected_open_bracket.txt index ef3584caa7..69bd3776ef 100644 --- a/src/snapshots/header_expected_open_bracket.txt +++ b/src/snapshots/header_expected_open_bracket.txt @@ -3,11 +3,11 @@ description= ~~~SOURCE module ~~~PROBLEMS -NIL +PARSER: header_expected_open_square ~~~TOKENS KwModule,EndOfFile ~~~PARSE -(file (malformed_header "header_expected_open_bracket")) +(file (malformed_header "header_expected_open_square")) ~~~FORMATTED ~~~END \ No newline at end of file From bbf54655eb8fcb519388677d6f87b219b9ac1d54 Mon Sep 17 00:00:00 2001 From: Luke Boswell Date: Tue, 11 Mar 2025 21:15:18 +1100 Subject: [PATCH 40/40] disable assertion causing crash --- src/check/parse/IR.zig | 4 +++- src/snapshots/fuzz_crash_012.txt | 20 ++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 src/snapshots/fuzz_crash_012.txt diff --git a/src/check/parse/IR.zig b/src/check/parse/IR.zig index d22fa6f151..d581c474cf 100644 --- a/src/check/parse/IR.zig +++ b/src/check/parse/IR.zig @@ -814,7 +814,9 @@ pub const NodeStore = struct { node.tag = .underscore_patt; }, .alternatives => |a| { - std.debug.assert(a.patterns.span.len > 1); + // disabled because it was hit by a fuzz test + // for a repro see src/snapshots/fuzz_crash_012.txt + // std.debug.assert(a.patterns.span.len > 1); node.tag = .alternatives_patt; node.data.lhs = a.patterns.span.start; node.data.rhs = a.patterns.span.len; diff --git a/src/snapshots/fuzz_crash_012.txt b/src/snapshots/fuzz_crash_012.txt new file mode 100644 index 0000000000..9da03b2920 --- /dev/null +++ b/src/snapshots/fuzz_crash_012.txt @@ -0,0 +1,20 @@ +~~~META +description=fuzz crash +~~~SOURCE + ||(|(l888888888| +~~~PROBLEMS +PARSER: missing_header +PARSER: unexpected_token +PARSER: pattern_unexpected_token +PARSER: unexpected_token +~~~TOKENS +OpOr,NoSpaceOpenRound,OpBar,NoSpaceOpenRound,LowerIdent,OpBar,EndOfFile +~~~PARSE +(file + (malformed_header "missing_header") + (malformed_expr "unexpected_token") + (malformed_expr "unexpected_token")) +~~~FORMATTED + + +~~~END \ No newline at end of file