gopls/internal/lsp/cache/parsego: clamp positions when fixing statements

findleyr · gopherbot · commit 79d4e32e15c5 · 2023-12-01T21:21:27.000Z
In golang/go#64488, we observe how a seemingly innocuous change to postfix completion tests led to significant flakiness in our integration tests, which started to encounter a new bug.Report for out-of-bounds positions on type checker errors. The root cause is that the new syntax of the test data triggered AST fixes (i.e. parsego.fixAST) that overflowed the original file. The failure was not deterministic because the postfix snippet tests do not wait for diagnostics: adding an env.AfterChange() to the end of the test body made the failure deterministic. Additionally, while investigating I encountered and fixed a clear bug that fixes (and therefore parsego.File.Fixed()) were not correctly counted. This was incidental, and did not contribute to the test failures. We don't actually use Fixed() very much, though we probably should consider it more. To fix the underlying bug, clamp positions to the token.File. This is definitely unsatisfactory, but after an hour of tracing through the fix logic, I am hesitant to commit to a more principled yet risky change. This logic is rather tricky and clearly contains a lot of embedded knowledge. It's probably best to leave it alone and redirect efforts to improved parser recovery. This is strong evidence that at some point we do need to carefully scrutinize the AST fixes (see also golang/go#64335). Updates golang/go#64335 Fixes golang/go#64488 Change-Id: I70a33c0c9aae66baae78e6474ee56cdaa25e45f4 Reviewed-on: https://go-review.googlesource.com/c/tools/+/546655 Auto-Submit: Robert Findley <rfindley@google.com> LUCI-TryBot-Result: Go LUCI <golang-scoped@luci-project-accounts.iam.gserviceaccount.com> Reviewed-by: Alan Donovan <adonovan@google.com>
diff --git a/gopls/internal/lsp/cache/check.go b/gopls/internal/lsp/cache/check.go
@@ -1806,7 +1806,12 @@ func typeErrorsToDiagnostics(pkg *syntaxPackage, errs []types.Error, linkTarget
 			if !posn.IsValid() {
 				// All valid positions produced by the type checker should described by
 				// its fileset.
-				bug.Reportf("internal error: type checker error %v not outside its Fset", e)
+				//
+				// Note: in golang/go#64488, we observed an error that was positioned
+				// over fixed syntax, which overflowed its file. So it's definitely
+				// possible that we get here (it's hard to reason about fixing up the
+				// AST). Nevertheless, it's a bug.
+				bug.Reportf("internal error: type checker error %q outside its Fset", e)
 				continue
 			}
 			pgf, err := pkg.File(protocol.URIFromPath(posn.Filename))
diff --git a/gopls/internal/lsp/cache/parsego/file.go b/gopls/internal/lsp/cache/parsego/file.go
@@ -24,30 +24,27 @@ type File struct {
 	// actual content of the file if we have fixed the AST.
 	Src []byte
 
-	// FixedSrc and Fixed AST report on "fixing" that occurred during parsing of
+	// fixedSrc and fixedAST report on "fixing" that occurred during parsing of
 	// this file.
 	//
-	// If FixedSrc == true, the source contained in the Src field was modified
-	// from the original source to improve parsing.
-	//
-	// If FixedAST == true, the ast was modified after parsing, and therefore
-	// positions encoded in the AST may not accurately represent the content of
-	// the Src field.
+	// fixedSrc means Src holds file content that was modified to improve parsing.
+	// fixedAST means File was modified after parsing, so AST positions may not
+	// reflect the content of Src.
 	//
 	// TODO(rfindley): there are many places where we haphazardly use the Src or
 	// positions without checking these fields. Audit these places and guard
 	// accordingly. After doing so, we may find that we don't need to
-	// differentiate FixedSrc and FixedAST.
-	FixedSrc bool
-	FixedAST bool
+	// differentiate fixedSrc and fixedAST.
+	fixedSrc bool
+	fixedAST bool
 	Mapper   *protocol.Mapper // may map fixed Src, not file content
 	ParseErr scanner.ErrorList
 }
 
 // Fixed reports whether p was "Fixed", meaning that its source or positions
 // may not correlate with the original file.
 func (p File) Fixed() bool {
-	return p.FixedSrc || p.FixedAST
+	return p.fixedSrc || p.fixedAST
 }
 
 // -- go/token domain convenience helpers --
diff --git a/gopls/internal/lsp/cache/parsego/parse.go b/gopls/internal/lsp/cache/parsego/parse.go
@@ -67,7 +67,7 @@ func Parse(ctx context.Context, fset *token.FileSet, uri protocol.DocumentURI, s
 	if parseErr != nil {
 		// Fix any badly parsed parts of the AST.
 		astFixes := fixAST(file, tok, src)
-		fixedAST = len(fixes) > 0
+		fixedAST = len(astFixes) > 0
 		if fixedAST {
 			fixes = append(fixes, astFixes...)
 		}
@@ -119,8 +119,8 @@ func Parse(ctx context.Context, fset *token.FileSet, uri protocol.DocumentURI, s
 		URI:      uri,
 		Mode:     mode,
 		Src:      src,
-		FixedSrc: fixedSrc,
-		FixedAST: fixedAST,
+		fixedSrc: fixedSrc,
+		fixedAST: fixedAST,
 		File:     file,
 		Tok:      tok,
 		Mapper:   protocol.NewMapper(uri, src),
@@ -519,7 +519,7 @@ func fixInitStmt(bad *ast.BadExpr, parent ast.Node, tok *token.File, src []byte)
 		return false
 	}
 	stmtBytes := src[start : end+1]
-	stmt, err := parseStmt(bad.Pos(), stmtBytes)
+	stmt, err := parseStmt(tok, bad.Pos(), stmtBytes)
 	if err != nil {
 		return false
 	}
@@ -621,7 +621,7 @@ func fixArrayType(bad *ast.BadExpr, parent ast.Node, tok *token.File, src []byte
 	// literal to be parseable.
 	exprBytes = append(exprBytes, '{', '}')
 
-	expr, err := parseExpr(from, exprBytes)
+	expr, err := parseExpr(tok, from, exprBytes)
 	if err != nil {
 		return false
 	}
@@ -786,7 +786,7 @@ FindTo:
 		exprBytes = append(exprBytes, '_')
 	}
 
-	expr, err := parseExpr(from, exprBytes)
+	expr, err := parseExpr(tok, from, exprBytes)
 	if err != nil {
 		return false
 	}
@@ -811,7 +811,10 @@ FindTo:
 
 // parseStmt parses the statement in src and updates its position to
 // start at pos.
-func parseStmt(pos token.Pos, src []byte) (ast.Stmt, error) {
+//
+// tok is the original file containing pos. Used to ensure that all adjusted
+// positions are valid.
+func parseStmt(tok *token.File, pos token.Pos, src []byte) (ast.Stmt, error) {
 	// Wrap our expression to make it a valid Go file we can pass to ParseFile.
 	fileSrc := bytes.Join([][]byte{
 		[]byte("package fake;func _(){"),
@@ -840,15 +843,15 @@ func parseStmt(pos token.Pos, src []byte) (ast.Stmt, error) {
 
 	// parser.ParseFile returns undefined positions.
 	// Adjust them for the current file.
-	offsetPositions(stmt, pos-1-(stmt.Pos()-1))
+	offsetPositions(tok, stmt, pos-1-(stmt.Pos()-1))
 
 	return stmt, nil
 }
 
 // parseExpr parses the expression in src and updates its position to
 // start at pos.
-func parseExpr(pos token.Pos, src []byte) (ast.Expr, error) {
-	stmt, err := parseStmt(pos, src)
+func parseExpr(tok *token.File, pos token.Pos, src []byte) (ast.Expr, error) {
+	stmt, err := parseStmt(tok, pos, src)
 	if err != nil {
 		return nil, err
 	}
@@ -864,7 +867,9 @@ func parseExpr(pos token.Pos, src []byte) (ast.Expr, error) {
 var tokenPosType = reflect.TypeOf(token.NoPos)
 
 // offsetPositions applies an offset to the positions in an ast.Node.
-func offsetPositions(n ast.Node, offset token.Pos) {
+func offsetPositions(tok *token.File, n ast.Node, offset token.Pos) {
+	fileBase := int64(tok.Base())
+	fileEnd := fileBase + int64(tok.Size())
 	ast.Inspect(n, func(n ast.Node) bool {
 		if n == nil {
 			return false
@@ -889,7 +894,18 @@ func offsetPositions(n ast.Node, offset token.Pos) {
 					continue
 				}
 
-				f.SetInt(f.Int() + int64(offset))
+				// Clamp value to valid range; see #64335.
+				//
+				// TODO(golang/go#64335): this is a hack, because our fixes should not
+				// produce positions that overflow (but they do: golang/go#64488).
+				pos := f.Int() + int64(offset)
+				if pos < fileBase {
+					pos = fileBase
+				}
+				if pos > fileEnd {
+					pos = fileEnd
+				}
+				f.SetInt(pos)
 			}
 		}
 
diff --git a/gopls/internal/lsp/cache/parsego/parse_test.go b/gopls/internal/lsp/cache/parsego/parse_test.go
@@ -0,0 +1,46 @@
+// Copyright 2023 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package parsego_test
+
+import (
+	"context"
+	"go/ast"
+	"go/token"
+	"testing"
+
+	"golang.org/x/tools/gopls/internal/lsp/cache/parsego"
+	"golang.org/x/tools/gopls/internal/util/safetoken"
+	"golang.org/x/tools/internal/tokeninternal"
+)
+
+// TODO(golang/go#64335): we should have many more tests for fixed syntax.
+
+func TestFixPosition_Issue64488(t *testing.T) {
+	// This test reproduces the conditions of golang/go#64488, where a type error
+	// on fixed syntax overflows the token.File.
+	const src = `
+package foo
+
+func _() {
+	type myThing struct{}
+	var foo []myThing
+	for ${1:}, ${2:} := range foo {
+	$0
+}
+}
+`
+
+	pgf, _ := parsego.Parse(context.Background(), token.NewFileSet(), "file://foo.go", []byte(src), parsego.ParseFull, false)
+	fset := tokeninternal.FileSetFor(pgf.Tok)
+	ast.Inspect(pgf.File, func(n ast.Node) bool {
+		if n != nil {
+			posn := safetoken.StartPosition(fset, n.Pos())
+			if !posn.IsValid() {
+				t.Fatalf("invalid position for %T (%v): %v not in [%d, %d]", n, n, n.Pos(), pgf.Tok.Base(), pgf.Tok.Base()+pgf.Tok.Size())
+			}
+		}
+		return true
+	})
+}