diff --git a/go/analysis/passes/printf/printf.go b/go/analysis/passes/printf/printf.go index 95c4bbaa98a..b95e2fd6f1a 100644 --- a/go/analysis/passes/printf/printf.go +++ b/go/analysis/passes/printf/printf.go @@ -5,7 +5,6 @@ package printf import ( - "bytes" _ "embed" "fmt" "go/ast" @@ -15,9 +14,7 @@ import ( "reflect" "regexp" "sort" - "strconv" "strings" - "unicode/utf8" "golang.org/x/tools/go/analysis" "golang.org/x/tools/go/analysis/passes/inspect" @@ -25,6 +22,7 @@ import ( "golang.org/x/tools/go/ast/inspector" "golang.org/x/tools/go/types/typeutil" "golang.org/x/tools/internal/analysisinternal" + "golang.org/x/tools/internal/fmtstr" "golang.org/x/tools/internal/typeparams" ) @@ -421,9 +419,9 @@ func checkCall(pass *analysis.Pass) { fn, kind := printfNameAndKind(pass, call) switch kind { case KindPrintf, KindErrorf: - checkPrintf(pass, kind, call, fn) + checkPrintf(pass, kind, call, fn.FullName()) case KindPrint: - checkPrint(pass, call, fn) + checkPrint(pass, call, fn.FullName()) } }) } @@ -485,26 +483,8 @@ func isFormatter(typ types.Type) bool { types.Identical(sig.Params().At(1).Type(), types.Typ[types.Rune]) } -// formatState holds the parsed representation of a printf directive such as "%3.*[4]d". -// It is constructed by parsePrintfVerb. -type formatState struct { - verb rune // the format verb: 'd' for "%d" - format string // the full format directive from % through verb, "%.3d". - name string // Printf, Sprintf etc. - flags []byte // the list of # + etc. - argNums []int // the successive argument numbers that are consumed, adjusted to refer to actual arg in call - firstArg int // Index of first argument after the format in the Printf call. - // Used only during parse. - pass *analysis.Pass - call *ast.CallExpr - argNum int // Which argument we're expecting to format now. - hasIndex bool // Whether the argument is indexed. - indexPending bool // Whether we have an indexed argument that has not resolved. - nbytes int // number of bytes of the format string consumed. -} - // checkPrintf checks a call to a formatted print routine such as Printf. -func checkPrintf(pass *analysis.Pass, kind Kind, call *ast.CallExpr, fn *types.Func) { +func checkPrintf(pass *analysis.Pass, kind Kind, call *ast.CallExpr, name string) { idx := formatStringIndex(pass, call) if idx < 0 || idx >= len(call.Args) { return @@ -523,7 +503,7 @@ func checkPrintf(pass *analysis.Pass, kind Kind, call *ast.CallExpr, fn *types.F Pos: formatArg.Pos(), End: formatArg.End(), Message: fmt.Sprintf("non-constant format string in call to %s", - fn.FullName()), + name), SuggestedFixes: []analysis.SuggestedFix{{ Message: `Insert "%s" format string`, TextEdits: []analysis.TextEdit{{ @@ -540,49 +520,46 @@ func checkPrintf(pass *analysis.Pass, kind Kind, call *ast.CallExpr, fn *types.F firstArg := idx + 1 // Arguments are immediately after format string. if !strings.Contains(format, "%") { if len(call.Args) > firstArg { - pass.Reportf(call.Lparen, "%s call has arguments but no formatting directives", fn.FullName()) + pass.Reportf(call.Lparen, "%s call has arguments but no formatting directives", name) } return } - // Hard part: check formats against args. - argNum := firstArg - maxArgNum := firstArg + + // Pass the string constant value so + // fmt.Sprintf("%"+("s"), "hi", 3) can be reported as + // "fmt.Sprintf call needs 1 arg but has 2 args". + operations, err := fmtstr.Parse(format, idx) + if err != nil { + // All error messages are in predicate form ("call has a problem") + // so that they may be affixed into a subject ("log.Printf "). + pass.ReportRangef(call.Args[idx], "%s %s", name, err) + return + } + + // index of the highest used index. + maxArgIndex := firstArg - 1 anyIndex := false - for i, w := 0, 0; i < len(format); i += w { - w = 1 - if format[i] != '%' { - continue - } - state := parsePrintfVerb(pass, call, fn.FullName(), format[i:], firstArg, argNum) - if state == nil { - return + // Check formats against args. + for _, operation := range operations { + if operation.Prec.Index != -1 || + operation.Width.Index != -1 || + operation.Verb.Index != -1 { + anyIndex = true } - w = len(state.format) - if !okPrintfArg(pass, call, state) { // One error per format is enough. + if !okPrintfArg(pass, call, &maxArgIndex, firstArg, name, operation) { + // One error per format is enough. return } - if state.hasIndex { - anyIndex = true - } - if state.verb == 'w' { + if operation.Verb.Verb == 'w' { switch kind { case KindNone, KindPrint, KindPrintf: - pass.Reportf(call.Pos(), "%s does not support error-wrapping directive %%w", state.name) + pass.Reportf(call.Pos(), "%s does not support error-wrapping directive %%w", name) return } } - if len(state.argNums) > 0 { - // Continue with the next sequential argument. - argNum = state.argNums[len(state.argNums)-1] + 1 - } - for _, n := range state.argNums { - if n >= maxArgNum { - maxArgNum = n + 1 - } - } } // Dotdotdot is hard. - if call.Ellipsis.IsValid() && maxArgNum >= len(call.Args)-1 { + if call.Ellipsis.IsValid() && maxArgIndex >= len(call.Args)-2 { return } // If any formats are indexed, extra arguments are ignored. @@ -590,147 +567,13 @@ func checkPrintf(pass *analysis.Pass, kind Kind, call *ast.CallExpr, fn *types.F return } // There should be no leftover arguments. - if maxArgNum != len(call.Args) { - expect := maxArgNum - firstArg + if maxArgIndex+1 < len(call.Args) { + expect := maxArgIndex + 1 - firstArg numArgs := len(call.Args) - firstArg - pass.ReportRangef(call, "%s call needs %v but has %v", fn.FullName(), count(expect, "arg"), count(numArgs, "arg")) - } -} - -// parseFlags accepts any printf flags. -func (s *formatState) parseFlags() { - for s.nbytes < len(s.format) { - switch c := s.format[s.nbytes]; c { - case '#', '0', '+', '-', ' ': - s.flags = append(s.flags, c) - s.nbytes++ - default: - return - } + pass.ReportRangef(call, "%s call needs %v but has %v", name, count(expect, "arg"), count(numArgs, "arg")) } } -// scanNum advances through a decimal number if present. -func (s *formatState) scanNum() { - for ; s.nbytes < len(s.format); s.nbytes++ { - c := s.format[s.nbytes] - if c < '0' || '9' < c { - return - } - } -} - -// parseIndex scans an index expression. It returns false if there is a syntax error. -func (s *formatState) parseIndex() bool { - if s.nbytes == len(s.format) || s.format[s.nbytes] != '[' { - return true - } - // Argument index present. - s.nbytes++ // skip '[' - start := s.nbytes - s.scanNum() - ok := true - if s.nbytes == len(s.format) || s.nbytes == start || s.format[s.nbytes] != ']' { - ok = false // syntax error is either missing "]" or invalid index. - s.nbytes = strings.Index(s.format[start:], "]") - if s.nbytes < 0 { - s.pass.ReportRangef(s.call, "%s format %s is missing closing ]", s.name, s.format) - return false - } - s.nbytes = s.nbytes + start - } - arg32, err := strconv.ParseInt(s.format[start:s.nbytes], 10, 32) - if err != nil || !ok || arg32 <= 0 || arg32 > int64(len(s.call.Args)-s.firstArg) { - s.pass.ReportRangef(s.call, "%s format has invalid argument index [%s]", s.name, s.format[start:s.nbytes]) - return false - } - s.nbytes++ // skip ']' - arg := int(arg32) - arg += s.firstArg - 1 // We want to zero-index the actual arguments. - s.argNum = arg - s.hasIndex = true - s.indexPending = true - return true -} - -// parseNum scans a width or precision (or *). It returns false if there's a bad index expression. -func (s *formatState) parseNum() bool { - if s.nbytes < len(s.format) && s.format[s.nbytes] == '*' { - if s.indexPending { // Absorb it. - s.indexPending = false - } - s.nbytes++ - s.argNums = append(s.argNums, s.argNum) - s.argNum++ - } else { - s.scanNum() - } - return true -} - -// parsePrecision scans for a precision. It returns false if there's a bad index expression. -func (s *formatState) parsePrecision() bool { - // If there's a period, there may be a precision. - if s.nbytes < len(s.format) && s.format[s.nbytes] == '.' { - s.flags = append(s.flags, '.') // Treat precision as a flag. - s.nbytes++ - if !s.parseIndex() { - return false - } - if !s.parseNum() { - return false - } - } - return true -} - -// parsePrintfVerb looks the formatting directive that begins the format string -// and returns a formatState that encodes what the directive wants, without looking -// at the actual arguments present in the call. The result is nil if there is an error. -func parsePrintfVerb(pass *analysis.Pass, call *ast.CallExpr, name, format string, firstArg, argNum int) *formatState { - state := &formatState{ - format: format, - name: name, - flags: make([]byte, 0, 5), - argNum: argNum, - argNums: make([]int, 0, 1), - nbytes: 1, // There's guaranteed to be a percent sign. - firstArg: firstArg, - pass: pass, - call: call, - } - // There may be flags. - state.parseFlags() - // There may be an index. - if !state.parseIndex() { - return nil - } - // There may be a width. - if !state.parseNum() { - return nil - } - // There may be a precision. - if !state.parsePrecision() { - return nil - } - // Now a verb, possibly prefixed by an index (which we may already have). - if !state.indexPending && !state.parseIndex() { - return nil - } - if state.nbytes == len(state.format) { - pass.ReportRangef(call.Fun, "%s format %s is missing verb at end of string", name, state.format) - return nil - } - verb, w := utf8.DecodeRuneInString(state.format[state.nbytes:]) - state.verb = verb - state.nbytes += w - if verb != '%' { - state.argNums = append(state.argNums, state.argNum) - } - state.format = state.format[:state.nbytes] - return state -} - // printfArgType encodes the types of expressions a printf verb accepts. It is a bitmask. type printfArgType int @@ -791,79 +634,96 @@ var printVerbs = []printVerb{ {'X', sharpNumFlag, argRune | argInt | argString | argPointer | argFloat | argComplex}, } -// okPrintfArg compares the formatState to the arguments actually present, -// reporting any discrepancies it can discern. If the final argument is ellipsissed, -// there's little it can do for that. -func okPrintfArg(pass *analysis.Pass, call *ast.CallExpr, state *formatState) (ok bool) { +// okPrintfArg compares the operation to the arguments actually present, +// reporting any discrepancies it can discern, maxArgIndex was the index of the highest used index. +// If the final argument is ellipsissed, there's little it can do for that. +func okPrintfArg(pass *analysis.Pass, call *ast.CallExpr, maxArgIndex *int, firstArg int, name string, operation *fmtstr.Operation) (ok bool) { + verb := operation.Verb.Verb var v printVerb found := false // Linear scan is fast enough for a small list. for _, v = range printVerbs { - if v.verb == state.verb { + if v.verb == verb { found = true break } } - // Could current arg implement fmt.Formatter? + // Could verb's arg implement fmt.Formatter? // Skip check for the %w verb, which requires an error. formatter := false - if v.typ != argError && state.argNum < len(call.Args) { - if tv, ok := pass.TypesInfo.Types[call.Args[state.argNum]]; ok { + if v.typ != argError && operation.Verb.ArgIndex < len(call.Args) { + if tv, ok := pass.TypesInfo.Types[call.Args[operation.Verb.ArgIndex]]; ok { formatter = isFormatter(tv.Type) } } if !formatter { if !found { - pass.ReportRangef(call, "%s format %s has unknown verb %c", state.name, state.format, state.verb) + pass.ReportRangef(call, "%s format %s has unknown verb %c", name, operation.Text, verb) return false } - for _, flag := range state.flags { + for _, flag := range operation.Flags { // TODO: Disable complaint about '0' for Go 1.10. To be fixed properly in 1.11. // See issues 23598 and 23605. if flag == '0' { continue } if !strings.ContainsRune(v.flags, rune(flag)) { - pass.ReportRangef(call, "%s format %s has unrecognized flag %c", state.name, state.format, flag) + pass.ReportRangef(call, "%s format %s has unrecognized flag %c", name, operation.Text, flag) return false } } } - // Verb is good. If len(state.argNums)>trueArgs, we have something like %.*s and all - // but the final arg must be an integer. - trueArgs := 1 - if state.verb == '%' { - trueArgs = 0 + + var argIndexes []int + // First check for *. + if operation.Width.Dynamic != -1 { + argIndexes = append(argIndexes, operation.Width.Dynamic) + } + if operation.Prec.Dynamic != -1 { + argIndexes = append(argIndexes, operation.Prec.Dynamic) } - nargs := len(state.argNums) - for i := 0; i < nargs-trueArgs; i++ { - argNum := state.argNums[i] - if !argCanBeChecked(pass, call, i, state) { + // If len(argIndexes)>0, we have something like %.*s and all + // indexes in argIndexes must be an integer. + for _, argIndex := range argIndexes { + if !argCanBeChecked(pass, call, argIndex, firstArg, operation, name) { return } - arg := call.Args[argNum] + arg := call.Args[argIndex] if reason, ok := matchArgType(pass, argInt, arg); !ok { details := "" if reason != "" { details = " (" + reason + ")" } - pass.ReportRangef(call, "%s format %s uses non-int %s%s as argument of *", state.name, state.format, analysisinternal.Format(pass.Fset, arg), details) + pass.ReportRangef(call, "%s format %s uses non-int %s%s as argument of *", name, operation.Text, analysisinternal.Format(pass.Fset, arg), details) return false } } - if state.verb == '%' || formatter { + // Collect to update maxArgNum in one loop. + if operation.Verb.ArgIndex != -1 && verb != '%' { + argIndexes = append(argIndexes, operation.Verb.ArgIndex) + } + for _, index := range argIndexes { + *maxArgIndex = max(*maxArgIndex, index) + } + + // Special case for '%', go will print "fmt.Printf("%10.2%%dhello", 4)" + // as "%4hello", discard any runes between the two '%'s, and treat the verb '%' + // as an ordinary rune, so early return to skip the type check. + if verb == '%' || formatter { return true } - argNum := state.argNums[len(state.argNums)-1] - if !argCanBeChecked(pass, call, len(state.argNums)-1, state) { + + // Now check verb's type. + verbArgIndex := operation.Verb.ArgIndex + if !argCanBeChecked(pass, call, verbArgIndex, firstArg, operation, name) { return false } - arg := call.Args[argNum] - if isFunctionValue(pass, arg) && state.verb != 'p' && state.verb != 'T' { - pass.ReportRangef(call, "%s format %s arg %s is a func value, not called", state.name, state.format, analysisinternal.Format(pass.Fset, arg)) + arg := call.Args[verbArgIndex] + if isFunctionValue(pass, arg) && verb != 'p' && verb != 'T' { + pass.ReportRangef(call, "%s format %s arg %s is a func value, not called", name, operation.Text, analysisinternal.Format(pass.Fset, arg)) return false } if reason, ok := matchArgType(pass, v.typ, arg); !ok { @@ -875,12 +735,12 @@ func okPrintfArg(pass *analysis.Pass, call *ast.CallExpr, state *formatState) (o if reason != "" { details = " (" + reason + ")" } - pass.ReportRangef(call, "%s format %s has arg %s of wrong type %s%s", state.name, state.format, analysisinternal.Format(pass.Fset, arg), typeString, details) + pass.ReportRangef(call, "%s format %s has arg %s of wrong type %s%s", name, operation.Text, analysisinternal.Format(pass.Fset, arg), typeString, details) return false } - if v.typ&argString != 0 && v.verb != 'T' && !bytes.Contains(state.flags, []byte{'#'}) { + if v.typ&argString != 0 && v.verb != 'T' && !strings.Contains(operation.Flags, "#") { if methodName, ok := recursiveStringer(pass, arg); ok { - pass.ReportRangef(call, "%s format %s with arg %s causes recursive %s method call", state.name, state.format, analysisinternal.Format(pass.Fset, arg), methodName) + pass.ReportRangef(call, "%s format %s with arg %s causes recursive %s method call", name, operation.Text, analysisinternal.Format(pass.Fset, arg), methodName) return false } } @@ -964,25 +824,24 @@ func isFunctionValue(pass *analysis.Pass, e ast.Expr) bool { // argCanBeChecked reports whether the specified argument is statically present; // it may be beyond the list of arguments or in a terminal slice... argument, which // means we can't see it. -func argCanBeChecked(pass *analysis.Pass, call *ast.CallExpr, formatArg int, state *formatState) bool { - argNum := state.argNums[formatArg] - if argNum <= 0 { +func argCanBeChecked(pass *analysis.Pass, call *ast.CallExpr, argIndex, firstArg int, operation *fmtstr.Operation, name string) bool { + if argIndex <= 0 { // Shouldn't happen, so catch it with prejudice. - panic("negative arg num") + panic("negative argIndex") } - if argNum < len(call.Args)-1 { + if argIndex < len(call.Args)-1 { return true // Always OK. } if call.Ellipsis.IsValid() { return false // We just can't tell; there could be many more arguments. } - if argNum < len(call.Args) { + if argIndex < len(call.Args) { return true } // There are bad indexes in the format or there are fewer arguments than the format needs. // This is the argument number relative to the format: Printf("%s", "hi") will give 1 for the "hi". - arg := argNum - state.firstArg + 1 // People think of arguments as 1-indexed. - pass.ReportRangef(call, "%s format %s reads arg #%d, but call has %v", state.name, state.format, arg, count(len(call.Args)-state.firstArg, "arg")) + arg := argIndex - firstArg + 1 // People think of arguments as 1-indexed. + pass.ReportRangef(call, "%s format %s reads arg #%d, but call has %v", name, operation.Text, arg, count(len(call.Args)-firstArg, "arg")) return false } @@ -999,7 +858,7 @@ const ( ) // checkPrint checks a call to an unformatted print routine such as Println. -func checkPrint(pass *analysis.Pass, call *ast.CallExpr, fn *types.Func) { +func checkPrint(pass *analysis.Pass, call *ast.CallExpr, name string) { firstArg := 0 typ := pass.TypesInfo.Types[call.Fun].Type if typ == nil { @@ -1033,7 +892,7 @@ func checkPrint(pass *analysis.Pass, call *ast.CallExpr, fn *types.Func) { if sel, ok := call.Args[0].(*ast.SelectorExpr); ok { if x, ok := sel.X.(*ast.Ident); ok { if x.Name == "os" && strings.HasPrefix(sel.Sel.Name, "Std") { - pass.ReportRangef(call, "%s does not take io.Writer but has first arg %s", fn.FullName(), analysisinternal.Format(pass.Fset, call.Args[0])) + pass.ReportRangef(call, "%s does not take io.Writer but has first arg %s", name, analysisinternal.Format(pass.Fset, call.Args[0])) } } } @@ -1047,25 +906,25 @@ func checkPrint(pass *analysis.Pass, call *ast.CallExpr, fn *types.Func) { if strings.Contains(s, "%") { m := printFormatRE.FindStringSubmatch(s) if m != nil { - pass.ReportRangef(call, "%s call has possible Printf formatting directive %s", fn.FullName(), m[0]) + pass.ReportRangef(call, "%s call has possible Printf formatting directive %s", name, m[0]) } } } - if strings.HasSuffix(fn.Name(), "ln") { + if strings.HasSuffix(name, "ln") { // The last item, if a string, should not have a newline. arg = args[len(args)-1] if s, ok := stringConstantExpr(pass, arg); ok { if strings.HasSuffix(s, "\n") { - pass.ReportRangef(call, "%s arg list ends with redundant newline", fn.FullName()) + pass.ReportRangef(call, "%s arg list ends with redundant newline", name) } } } for _, arg := range args { if isFunctionValue(pass, arg) { - pass.ReportRangef(call, "%s arg %s is a func value, not called", fn.FullName(), analysisinternal.Format(pass.Fset, arg)) + pass.ReportRangef(call, "%s arg %s is a func value, not called", name, analysisinternal.Format(pass.Fset, arg)) } if methodName, ok := recursiveStringer(pass, arg); ok { - pass.ReportRangef(call, "%s arg %s causes recursive call to %s method", fn.FullName(), analysisinternal.Format(pass.Fset, arg), methodName) + pass.ReportRangef(call, "%s arg %s causes recursive call to %s method", name, analysisinternal.Format(pass.Fset, arg), methodName) } } } diff --git a/go/analysis/passes/printf/testdata/src/a/a.go b/go/analysis/passes/printf/testdata/src/a/a.go index 18b9e3be2b9..02ce425f8a3 100644 --- a/go/analysis/passes/printf/testdata/src/a/a.go +++ b/go/analysis/passes/printf/testdata/src/a/a.go @@ -212,8 +212,8 @@ func PrintfTests() { // Bad argument reorderings. Printf("%[xd", 3) // want `a.Printf format %\[xd is missing closing \]` Printf("%[x]d x", 3) // want `a.Printf format has invalid argument index \[x\]` - Printf("%[3]*s x", "hi", 2) // want `a.Printf format has invalid argument index \[3\]` - _ = fmt.Sprintf("%[3]d x", 2) // want `fmt.Sprintf format has invalid argument index \[3\]` + Printf("%[3]*s x", "hi", 2) // want `a.Printf format %\[3]\*s reads arg #3, but call has 2 args` + _ = fmt.Sprintf("%[3]d x", 2) // want `fmt.Sprintf format %\[3]d reads arg #3, but call has 1 arg` Printf("%[2]*.[1]*[3]d x", 2, "hi", 4) // want `a.Printf format %\[2]\*\.\[1\]\*\[3\]d uses non-int \x22hi\x22 as argument of \*` Printf("%[0]s x", "arg1") // want `a.Printf format has invalid argument index \[0\]` Printf("%[0]d x", 1) // want `a.Printf format has invalid argument index \[0\]` diff --git a/internal/fmtstr/main.go b/internal/fmtstr/main.go new file mode 100644 index 00000000000..7fcbfdbbf2c --- /dev/null +++ b/internal/fmtstr/main.go @@ -0,0 +1,94 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +//go:build ignore + +// The fmtstr command parses the format strings of calls to selected +// printf-like functions in the specified source file, and prints the +// formatting operations and their operands. +// +// It is intended only for debugging and is not a supported interface. +package main + +import ( + "flag" + "fmt" + "go/ast" + "go/parser" + "go/printer" + "go/token" + "log" + "strconv" + "strings" + + "golang.org/x/tools/internal/fmtstr" +) + +func main() { + log.SetPrefix("fmtstr: ") + log.SetFlags(0) + flag.Parse() + + fset := token.NewFileSet() + f, err := parser.ParseFile(fset, flag.Args()[0], nil, 0) + if err != nil { + log.Fatal(err) + } + + functions := map[string]int{ + "fmt.Errorf": 0, + "fmt.Fprintf": 1, + "fmt.Printf": 0, + "fmt.Sprintf": 0, + "log.Printf": 0, + } + + ast.Inspect(f, func(n ast.Node) bool { + if call, ok := n.(*ast.CallExpr); ok && !call.Ellipsis.IsValid() { + if sel, ok := call.Fun.(*ast.SelectorExpr); ok && is[*ast.Ident](sel.X) { + name := sel.X.(*ast.Ident).Name + "." + sel.Sel.Name // e.g. "fmt.Printf" + if fmtstrIndex, ok := functions[name]; ok && + len(call.Args) > fmtstrIndex { + // Is it a string literal? + if fmtstrArg, ok := call.Args[fmtstrIndex].(*ast.BasicLit); ok && + fmtstrArg.Kind == token.STRING { + // Have fmt.Printf("format", ...) + format, _ := strconv.Unquote(fmtstrArg.Value) + + ops, err := fmtstr.Parse(format, 0) + if err != nil { + log.Printf("%s: %v", fset.Position(fmtstrArg.Pos()), err) + return true + } + + fmt.Printf("%s: %s(%s, ...)\n", + fset.Position(fmtstrArg.Pos()), + name, + fmtstrArg.Value) + for _, op := range ops { + // TODO(adonovan): show more detail. + fmt.Printf("\t%q\t%v\n", + op.Text, + formatNode(fset, call.Args[op.Verb.ArgIndex])) + } + } + } + } + } + return true + }) +} + +func is[T any](x any) bool { + _, ok := x.(T) + return ok +} + +func formatNode(fset *token.FileSet, n ast.Node) string { + var buf strings.Builder + if err := printer.Fprint(&buf, fset, n); err != nil { + return "" + } + return buf.String() +} diff --git a/internal/fmtstr/parse.go b/internal/fmtstr/parse.go new file mode 100644 index 00000000000..9ab264f45d6 --- /dev/null +++ b/internal/fmtstr/parse.go @@ -0,0 +1,370 @@ +// Copyright 2024 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Package fmtstr defines a parser for format strings as used by [fmt.Printf]. +package fmtstr + +import ( + "fmt" + "strconv" + "strings" + "unicode/utf8" +) + +// Operation holds the parsed representation of a printf operation such as "%3.*[4]d". +// It is constructed by [Parse]. +type Operation struct { + Text string // full text of the operation, e.g. "%[2]*.3d" + Verb Verb // verb specifier, guaranteed to exist, e.g., 'd' in '%[1]d' + Range Range // the range of Text within the overall format string + Flags string // formatting flags, e.g. "-0" + Width Size // width specifier, e.g., '3' in '%3d' + Prec Size // precision specifier, e.g., '.4' in '%.4f' +} + +// Size describes an optional width or precision in a format operation. +// It may represent no value, a literal number, an asterisk, or an indexed asterisk. +type Size struct { + // At most one of these two fields is non-negative. + Fixed int // e.g. 4 from "%4d", otherwise -1 + Dynamic int // index of argument providing dynamic size (e.g. %*d or %[3]*d), otherwise -1 + + Index int // If the width or precision uses an indexed argument (e.g. 2 in %[2]*d), this is the index, otherwise -1 + Range Range // position of the size specifier within the operation +} + +// Verb represents the verb character of a format operation (e.g., 'd', 's', 'f'). +// It also includes positional information and any explicit argument indexing. +type Verb struct { + Verb rune + Range Range // positional range of the verb in the format string + Index int // index of an indexed argument, (e.g. 2 in %[2]d), otherwise -1 + ArgIndex int // argument index (0-based) associated with this verb, relative to CallExpr +} + +// byte offsets of format string +type Range struct { + Start, End int +} + +// Parse takes a format string and its index in the printf-like call, +// parses out all format operations, returns a slice of parsed +// [Operation] which describes flags, width, precision, verb, and argument indexing, +// or an error if parsing fails. +// +// All error messages are in predicate form ("call has a problem") +// so that they may be affixed into a subject ("log.Printf "). +// +// The flags will only be a subset of ['#', '0', '+', '-', ' ']. +// It does not perform any validation of verbs, nor the +// existence of corresponding arguments (obviously it can't). The provided format string may differ +// from the one in CallExpr, such as a concatenated string or a string +// referred to by the argument in the CallExpr. +func Parse(format string, idx int) ([]*Operation, error) { + if !strings.Contains(format, "%") { + return nil, fmt.Errorf("call has arguments but no formatting directives") + } + + firstArg := idx + 1 // Arguments are immediately after format string. + argNum := firstArg + var operations []*Operation + for i, w := 0, 0; i < len(format); i += w { + w = 1 + if format[i] != '%' { + continue + } + state, err := parseOperation(format[i:], firstArg, argNum) + if err != nil { + return nil, err + } + + state.operation.addOffset(i) + operations = append(operations, state.operation) + + w = len(state.operation.Text) + // Do not waste an argument for '%'. + if state.operation.Verb.Verb != '%' { + argNum = state.argNum + 1 + } + } + return operations, nil +} + +// Internal parsing state to operation. +type state struct { + operation *Operation + firstArg int // index of the first argument after the format string + argNum int // which argument we're expecting to format now + hasIndex bool // whether the argument is indexed + index int // the encountered index + indexPos int // the encountered index's offset + indexPending bool // whether we have an indexed argument that has not resolved + nbytes int // number of bytes of the format string consumed +} + +// parseOperation parses one format operation starting at the given substring `format`, +// which should begin with '%'. It returns a fully populated state or an error +// if the operation is malformed. The firstArg and argNum parameters help determine how +// arguments map to this operation. +// +// Parse sequence: '%' -> flags -> {[N]* or width} -> .{[N]* or precision} -> [N] -> verb. +func parseOperation(format string, firstArg, argNum int) (*state, error) { + state := &state{ + operation: &Operation{ + Text: format, + Width: Size{ + Fixed: -1, + Dynamic: -1, + Index: -1, + }, + Prec: Size{ + Fixed: -1, + Dynamic: -1, + Index: -1, + }, + }, + firstArg: firstArg, + argNum: argNum, + hasIndex: false, + index: 0, + indexPos: 0, + indexPending: false, + nbytes: len("%"), // There's guaranteed to be a percent sign. + } + // There may be flags. + state.parseFlags() + // There may be an index. + if err := state.parseIndex(); err != nil { + return nil, err + } + // There may be a width. + state.parseSize(Width) + // There may be a precision. + if err := state.parsePrecision(); err != nil { + return nil, err + } + // Now a verb, possibly prefixed by an index (which we may already have). + if !state.indexPending { + if err := state.parseIndex(); err != nil { + return nil, err + } + } + if state.nbytes == len(state.operation.Text) { + return nil, fmt.Errorf("format %s is missing verb at end of string", state.operation.Text) + } + verb, w := utf8.DecodeRuneInString(state.operation.Text[state.nbytes:]) + + // Ensure there must be a verb. + if state.indexPending { + state.operation.Verb = Verb{ + Verb: verb, + Range: Range{ + Start: state.indexPos, + End: state.nbytes + w, + }, + Index: state.index, + ArgIndex: state.argNum, + } + } else { + state.operation.Verb = Verb{ + Verb: verb, + Range: Range{ + Start: state.nbytes, + End: state.nbytes + w, + }, + Index: -1, + ArgIndex: state.argNum, + } + } + + state.nbytes += w + state.operation.Text = state.operation.Text[:state.nbytes] + return state, nil +} + +// addOffset adjusts the recorded positions in Verb, Width, Prec, and the +// operation's overall Range to be relative to the position in the full format string. +func (s *Operation) addOffset(parsedLen int) { + s.Verb.Range.Start += parsedLen + s.Verb.Range.End += parsedLen + + s.Range.Start = parsedLen + s.Range.End = s.Verb.Range.End + + // one of Fixed or Dynamic is non-negative means existence. + if s.Prec.Fixed != -1 || s.Prec.Dynamic != -1 { + s.Prec.Range.Start += parsedLen + s.Prec.Range.End += parsedLen + } + if s.Width.Fixed != -1 || s.Width.Dynamic != -1 { + s.Width.Range.Start += parsedLen + s.Width.Range.End += parsedLen + } +} + +// parseFlags accepts any printf flags. +func (s *state) parseFlags() { + s.operation.Flags = prefixOf(s.operation.Text[s.nbytes:], "#0+- ") + s.nbytes += len(s.operation.Flags) +} + +// prefixOf returns the prefix of s composed only of runes from the specified set. +func prefixOf(s, set string) string { + rest := strings.TrimLeft(s, set) + return s[:len(s)-len(rest)] +} + +// parseIndex parses an argument index of the form "[n]" that can appear +// in a printf operation (e.g., "%[2]d"). Returns an error if syntax is +// malformed or index is invalid. +func (s *state) parseIndex() error { + if s.nbytes == len(s.operation.Text) || s.operation.Text[s.nbytes] != '[' { + return nil + } + // Argument index present. + s.nbytes++ // skip '[' + start := s.nbytes + if num, ok := s.scanNum(); ok { + // Later consumed/stored by a '*' or verb. + s.index = num + s.indexPos = start - 1 + } + + ok := true + if s.nbytes == len(s.operation.Text) || s.nbytes == start || s.operation.Text[s.nbytes] != ']' { + ok = false // syntax error is either missing "]" or invalid index. + s.nbytes = strings.Index(s.operation.Text[start:], "]") + if s.nbytes < 0 { + return fmt.Errorf("format %s is missing closing ]", s.operation.Text) + } + s.nbytes = s.nbytes + start + } + arg32, err := strconv.ParseInt(s.operation.Text[start:s.nbytes], 10, 32) + if err != nil || !ok || arg32 <= 0 { + return fmt.Errorf("format has invalid argument index [%s]", s.operation.Text[start:s.nbytes]) + } + + s.nbytes++ // skip ']' + arg := int(arg32) + arg += s.firstArg - 1 // We want to zero-index the actual arguments. + s.argNum = arg + s.hasIndex = true + s.indexPending = true + return nil +} + +// scanNum advances through a decimal number if present, which represents a [Size] or [Index]. +func (s *state) scanNum() (int, bool) { + start := s.nbytes + for ; s.nbytes < len(s.operation.Text); s.nbytes++ { + c := s.operation.Text[s.nbytes] + if c < '0' || '9' < c { + if start < s.nbytes { + num, _ := strconv.ParseInt(s.operation.Text[start:s.nbytes], 10, 32) + return int(num), true + } else { + return 0, false + } + } + } + return 0, false +} + +type sizeType int + +const ( + Width sizeType = iota + Precision +) + +// parseSize parses a width or precision specifier. It handles literal numeric +// values (e.g., "%3d"), asterisk values (e.g., "%*d"), or indexed asterisk values (e.g., "%[2]*d"). +func (s *state) parseSize(kind sizeType) { + if s.nbytes < len(s.operation.Text) && s.operation.Text[s.nbytes] == '*' { + s.nbytes++ + if s.indexPending { + // Absorb it. + s.indexPending = false + size := Size{ + Fixed: -1, + Dynamic: s.argNum, + Index: s.index, + Range: Range{ + Start: s.indexPos, + End: s.nbytes, + }, + } + switch kind { + case Width: + s.operation.Width = size + case Precision: + // Include the leading '.'. + size.Range.Start -= len(".") + s.operation.Prec = size + default: + panic(kind) + } + } else { + // Non-indexed asterisk: "%*d". + size := Size{ + Dynamic: s.argNum, + Index: -1, + Fixed: -1, + Range: Range{ + Start: s.nbytes - 1, + End: s.nbytes, + }, + } + switch kind { + case Width: + s.operation.Width = size + case Precision: + // For precision, include the '.' in the range. + size.Range.Start -= 1 + s.operation.Prec = size + default: + panic(kind) + } + } + s.argNum++ + } else { // Literal number, e.g. "%10d" + start := s.nbytes + if num, ok := s.scanNum(); ok { + size := Size{ + Fixed: num, + Index: -1, + Dynamic: -1, + Range: Range{ + Start: start, + End: s.nbytes, + }, + } + switch kind { + case Width: + s.operation.Width = size + case Precision: + // Include the leading '.'. + size.Range.Start -= 1 + s.operation.Prec = size + default: + panic(kind) + } + } + } +} + +// parsePrecision checks if there's a precision specified after a '.' character. +// If found, it may also parse an index or an asterisk. Returns an error if any index +// parsing fails. +func (s *state) parsePrecision() error { + // If there's a period, there may be a precision. + if s.nbytes < len(s.operation.Text) && s.operation.Text[s.nbytes] == '.' { + s.nbytes++ + if err := s.parseIndex(); err != nil { + return err + } + s.parseSize(Precision) + } + return nil +}