diff --git a/Makefile b/Makefile index efc12bb4..75df715d 100644 --- a/Makefile +++ b/Makefile @@ -70,6 +70,9 @@ help: @echo " make clean - to remove generated files and directories" @echo " make help - to display this help message" +devTests: + @$(GOCMD) run app/scripts/cmd/dev/dev.go $(filter-out $@,$(MAKECMDGOALS)) + # Prevents make from interpreting the arguments as targets %: @: diff --git a/app/scripts/cmd/dev/dev.go b/app/scripts/cmd/dev/dev.go new file mode 100644 index 00000000..0830f921 --- /dev/null +++ b/app/scripts/cmd/dev/dev.go @@ -0,0 +1,158 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "os/exec" + "path/filepath" + "strings" +) + +func main() { + if len(os.Args) != 3 { + fmt.Println("Usage: go run script.go ") + os.Exit(1) + } + + inputDir := os.Args[1] + outputDir := os.Args[2] + + // Check if the input directory exists + if _, err := os.Stat(inputDir); os.IsNotExist(err) { + fmt.Printf("Error: input directory %s does not exist\n", inputDir) + os.Exit(1) + } + + // Check if the output directory exists, create if it doesn't + if _, err := os.Stat(outputDir); os.IsNotExist(err) { + if err := os.MkdirAll(outputDir, 0755); err != nil { + fmt.Printf("Error creating output directory %s: %v\n", outputDir, err) + os.Exit(1) + } + } + + // Walk through the files in the input directory + err := filepath.Walk(inputDir, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Skip directories + if info.IsDir() { + return nil + } + + // Check for the corresponding ".post" file + if !strings.Contains(info.Name(), ".post.") { + baseName := strings.TrimSuffix(info.Name(), filepath.Ext(info.Name())) + ext := filepath.Ext(info.Name()) + postFile := filepath.Join(inputDir, baseName+".post"+ext) + + if _, err := os.Stat(postFile); err == nil { + // Prepare the output file name + outputFileName := fmt.Sprintf("%s.diff.txt", baseName) + outputFilePath := filepath.Join(outputDir, outputFileName) + + // Prepare the diff command + cmd := exec.Command("diff", "-u", path, postFile) + + // Redirect the output to the output file + outfile, err := os.Create(outputFilePath) + if err != nil { + fmt.Printf("Error creating output file %s: %v\n", outputFilePath, err) + return err + } + defer outfile.Close() + + cmd.Stdout = outfile + cmd.Stderr = os.Stderr + + // Run the command + err = cmd.Run() + if err != nil { + if exitError, ok := err.(*exec.ExitError); ok { + // Check the exit code + if exitError.ExitCode() == 1 { + // diff found differences + fmt.Printf("Differences found and written to %s\n", outputFilePath) + } else { + fmt.Printf("Error running diff command on %s and %s: %v\n", path, postFile, err) + return err + } + } else { + fmt.Printf("Error running diff command on %s and %s: %v\n", path, postFile, err) + return err + } + } else { + fmt.Printf("No differences found between %s and %s\n", path, postFile) + } + + // Now modify the original file + err = modifyFile(path) + if err != nil { + fmt.Printf("Error modifying file %s: %v\n", path, err) + return err + } + } + } + + return nil + }) + + if err != nil { + fmt.Printf("Error walking the path %s: %v\n", inputDir, err) + os.Exit(1) + } +} + +func modifyFile(filePath string) error { + inputFile, err := os.Open(filePath) + if err != nil { + return err + } + defer inputFile.Close() + + outputFilePath := filePath + ".tmp" + outputFile, err := os.Create(outputFilePath) + if err != nil { + return err + } + defer outputFile.Close() + + scanner := bufio.NewScanner(inputFile) + writer := bufio.NewWriter(outputFile) + + lineNum := 1 + for scanner.Scan() { + line := scanner.Text() + newLine := fmt.Sprintf("pdx-%d: %s\n", lineNum, line) + if _, err := writer.WriteString(newLine); err != nil { + return err + } + lineNum++ + } + + if err := scanner.Err(); err != nil { + return err + } + + if err := writer.Flush(); err != nil { + return err + } + + if err := inputFile.Close(); err != nil { + return err + } + + if err := outputFile.Close(); err != nil { + return err + } + + // Replace the original file with the modified file + if err := os.Rename(outputFilePath, filePath); err != nil { + return err + } + + return nil +} diff --git a/app/scripts/cmd/pdx/pdx.go b/app/scripts/cmd/pdx/pdx.go new file mode 100644 index 00000000..e550fb44 --- /dev/null +++ b/app/scripts/cmd/pdx/pdx.go @@ -0,0 +1,109 @@ +package main + +import ( + "bufio" + "fmt" + "os" + "path/filepath" + "regexp" +) + +func main() { + if len(os.Args) < 2 { + fmt.Println("pdx_app ") + return + } + + dir := os.Args[1] + + // detect if we are in a directory or a file + fileInfo, err := os.Stat(dir) + if err != nil { + fmt.Printf("Error getting file info: %v\n", err) + return + } + + if fileInfo.IsDir() { + fmt.Print("Processing directory\n") + err := filepath.Walk(dir, processFile) + if err != nil { + fmt.Printf("Error walking the directory: %v\n", err) + } + } else { + fmt.Print("Processing file\n") + err = processFile(dir, fileInfo, nil) + if err != nil { + fmt.Printf("Error processing file %s: %v\n", dir, err) + } + } + +} + +func processFile(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + // Check if the file matches the pattern .post. + matched, err := regexp.MatchString(`.*\.post\..*`, info.Name()) + if err != nil { + return err + } + + if matched { + } + modifyFile(path) + + return nil +} + +func modifyFile(filePath string) error { + inputFile, err := os.Open(filePath) + if err != nil { + return err + } + defer inputFile.Close() + + outputFilePath := filePath + ".tmp" + outputFile, err := os.Create(outputFilePath) + if err != nil { + return err + } + defer outputFile.Close() + + scanner := bufio.NewScanner(inputFile) + writer := bufio.NewWriter(outputFile) + + lineNum := 1 + for scanner.Scan() { + line := scanner.Text() + newLine := fmt.Sprintf("pdx-%d: %s\n", lineNum, line) + if _, err := writer.WriteString(newLine); err != nil { + return err + } + lineNum++ + } + + if err := scanner.Err(); err != nil { + return err + } + + if err := writer.Flush(); err != nil { + return err + } + + if err := inputFile.Close(); err != nil { + return err + } + + if err := outputFile.Close(); err != nil { + return err + } + + // Replace the original file with the modified file + if err := os.Rename(outputFilePath, filePath); err != nil { + return err + } + + return nil +} diff --git a/test/evals/promptfoo-poc/build/assets/build/changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/build.changes.md similarity index 100% rename from test/evals/promptfoo-poc/build/assets/build/changes.md rename to test/evals/promptfoo-poc/build/assets/go/changes/build.changes.md diff --git a/test/evals/promptfoo-poc/build/assets/go/changes/connection.changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/connection.changes.md new file mode 100644 index 00000000..62b7a383 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/changes/connection.changes.md @@ -0,0 +1,119 @@ +### Subtask 1: Update `init` function to include error handling and retry logic for database connection. + +```go +package database + +import "database/sql" + +var db *sql.DB + +func init() { + // Initialize database connection + var err error + for i := 0; i < 3; i++ { + db, err = sql.Open(" postgres", "user=postgres password=postgres dbname=postgres sslmode=disable") + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + err = db.Ping() + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + break + } +} + +func GetConnection() *sql.DB { + return db +} +``` + +### Subtask 2: Add logging for successful connection or failure in the `init` method. + +```go +package database + +import ( + "database/sql" + "log" +) + +var db *sql.DB + +func init() { + // Initialize database connection + + var err error + for i := 0; i < 3; i++ { + db, err = sql.Open("postgres", "user=postgres password=postgres dbname=postgres sslmode=disable") + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + err = db.Ping() + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + log.Println("Connected to database") + break + } +} + +func GetConnection() *sql.DB { + return db +} +``` + +### Subtask 3: Provide a new function `CloseConnection` to properly close the database connection. + +```go +package database + +import ( + "database/sql" + "log" +) + +var db *sql.DB + +func init() { + // Initialize database connection + + var err error + for i := 0; i < 3; i++ { + db, err = sql.Open("postgres", "user=postgres password=postgres dbname=postgres sslmode=disable") + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + err = db.Ping() + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + log.Println("Connected to database") + break + } +} + +func GetConnection() *sql.DB { + return db +} + + +func CloseConnection() { + if db != nil { + db.Close() + log.Println("Database connection closed") + } +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/go/changes/data_models.changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/data_models.changes.md new file mode 100644 index 00000000..fac7df07 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/changes/data_models.changes.md @@ -0,0 +1,74 @@ +### Subtask 1: Add a `CreatedAt` timestamp field to both `User` and `Post` structs. + +```go +package models + +type User struct { + ID string + Username string + Email string + CreatedAt time.Time +} + +type Post struct { + ID string + Content string + Author string + CreatedAt time.Time +} +``` + +### Subtask 2: Add a new struct `Comment` with fields `ID`, `Content`, `Author`, and `CreatedAt`. + +```go +package models + +type Comment struct { + ID string + Content string + Author string + CreatedAt time.Time +} + +type User struct { + ID string + Username string + Email string + CreatedAt time.Time +} + +type Post struct { + ID string + Content string + Author string + CreatedAt time.Time +} +``` + +### Subtask 3: Update `Post` to include a slice of `Comment` references. + +```go +package models + +type Post struct { + ID string + Content string + Author string + CreatedAt time.Time + Comments []*Comment +} + +type Comment struct { + ID string + Content string + Author string + CreatedAt time.Time +} + +type User struct { + ID string + Username string + Email string + CreatedAt time.Time +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/go/changes/network_config.changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/network_config.changes.md new file mode 100644 index 00000000..47bb9932 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/changes/network_config.changes.md @@ -0,0 +1,104 @@ +### Subtask 1: Replace the entire InitializeNetwork function to support dynamic network configurations based on environmental variables. + +```go +package network + +func InitializeNetwork() { + // Initialize default network settings + + // Check if environment variables are set + if ipAddress := os.Getenv("IP_ADDRESS"); ipAddress != "" { + ConfigureIP(ipAddress) + } + if subnetMask := os.Getenv("SUBNET_MASK"); subnetMask != "" { + ConfigureSubnet(subnetMask) + } + if gateway := os.Getenv("GATEWAY"); gateway != "" { + ConfigureGateway(gateway) + } + if dnsServer := os.Getenv("DNS_SERVER"); dnsServer != "" { + ConfigureDNS(dnsServer) + } + if domain := os.Getenv("DOMAIN"); domain != "" { + ConfigureDomain(domain) + } + if proxy := os.Getenv("PROXY"); proxy != "" { + ConfigureProxy(proxy) + } + if port := os.Getenv("PORT"); port != "" { + ConfigurePort(port) + } + // Verify network connectivity + VerifyConnectivity() +} + +func ConfigureIP(ipAddress string) { + // Code to configure IP address +} +``` + +### Subtask 2: Add a new function VerifyConnectivity that pings a set of predefined addresses to ensure network configuration is successful. + + +```go +package network + +func InitializeNetwork() { + // Initialize default network settings + + // Check if environment variables are set + if ipAddress := os.Getenv("IP_ADDRESS"); ipAddress != "" { + ConfigureIP(ipAddress) + } + if subnetMask := os.Getenv("SUBNET_MASK"); subnetMask != "" { + ConfigureSubnet(subnetMask) + } + if gateway := os.Getenv("GATEWAY"); gateway != "" { + ConfigureGateway(gateway) + } + if dnsServer := os.Getenv("DNS_SERVER"); dnsServer != "" { + ConfigureDNS(dnsServer) + } + if domain := os.Getenv("DOMAIN"); domain != "" { + ConfigureDomain(domain) + } + if proxy := os.Getenv("PROXY"); proxy != "" { + ConfigureProxy(proxy) + } + if port := os.Getenv("PORT"); port != "" { + ConfigurePort(port) + } + // Verify network connectivity + VerifyConnectivity() +} + +func ConfigureIP(ipAddress string) { + // Code to configure IP address +} + +func VerifyConnectivity() { + // Ping a set of predefined addresses + + // Ping the default gateway + if err := ping(""); err != nil { + log.Fatal("Failed to ping the default gateway") + } + + // Ping the DNS server + if err := ping(""); err != nil { + log.Fatal("Failed to ping the DNS server") + } + + // Ping a public domain + if err := ping(""); err != nil { + log.Fatal("Failed to ping a public domain") + } + + // Ping a public IP address + if err := ping(""); err != nil { + log.Fatal("Failed to ping a public IP address") + } + + log.Println("Network configuration successful") +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/go/changes/server_setup.changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/server_setup.changes.md new file mode 100644 index 00000000..19be1b24 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/changes/server_setup.changes.md @@ -0,0 +1,62 @@ +### Subtask 1: Update the `handler` function to log the request method and URI + +```go +package server + +import ( + "log" + "net/http" +) + +func init() { log.Println("Server starting...") } + +func handler(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("Hello, world!")) + log.Printf("Request: %s %s", r.Method, r.RequestURI) +} +``` + +### Subtask 2: Add a new endpoint for `/api` that returns `API is live` + +```go +package server + +import ( + "log" + "net/http" +) + +func init() { log.Println("Server starting...") } + +func handler(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("Hello, world!")) + log.Printf("Request: %s %s", r.Method, r.RequestURI) +} + +func apiHandler(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("API is live")) +} +``` + +### Subtask 3: Ensure logging of server startup includes timestamp + +```go +package server + +import ( + "log" + "net/http" + "time" +) + +func init() { log.Printf("Server starting at %s...", time.Now()) } + +func handler(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("Hello, world!")) + log.Printf("Request: %s %s", r.Method, r.RequestURI) +} + +func apiHandler(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("API is live")) +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/go/changes/sort_functions.changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/sort_functions.changes.md new file mode 100644 index 00000000..7178782b --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/changes/sort_functions.changes.md @@ -0,0 +1,86 @@ +### Subtask 1: Correct the sorting logic in `sortIntegers` function to actually sort the integers. + +```go +package utils + +func sortIntegers(input []int) { + // Sort the input array + for i := 0; i < len(input); i++ { + for j := i + 1; j < len(input); j++ { + if input[i] > input[j] { + input[i], input[j] = input[j], input[i] + } + } + } +} + +func main() { + values := []int{2, 3, 1, 4} + sortIntegers(values) + // Output should be a sorted array +} +``` + +### Subtask 2: Add a new function `printValues` to print the sorted array. + +```go +package utils + +import "fmt" + +func sortIntegers(input *[]int) []int { + // Sort the input array + for i := 0; i < len(input); i++ { + for j := i + 1; j < len(input); j++ { + if input[i] > input[j] { + input[i], input[j] = input[j], input[i] + } + } + } +} + +func printValues(input []int) { + // Print the sorted array + for i := 0; i < len(input); i++ { + fmt.Println(input[i]) + } +} + +func main() { + values := []int{2, 3, 1, 4} + sortIntegers(values) + // Output should be a sorted array +} +``` + +### Subtask 3: Update `main` function to call `printValues` after sorting. + +```go +package utils + +import "fmt" + +func sortIntegers(input []int) { + // Sort the input array + for i := 0; i < len(input); i++ { + for j := i + 1; j < len(input); j++ { + if input[i] > input[j] { + input[i], input[j] = input[j], input[i] + } + } + } +} + +func printValues(input []int) { + // Print the sorted array + for i := 0; i < len(input); i++ { + fmt.Println(input[i]) + } +} + +func main() { + values := []int{2, 3, 1, 4} + sortIntegers(values) + printValues(values) +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/go/changes/user_handler.changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/user_handler.changes.md new file mode 100644 index 00000000..15311c82 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/changes/user_handler.changes.md @@ -0,0 +1,155 @@ +### Subtask 1: Add input validation for `UserSignup` to check for valid email and password. + +```go +package handlers + +import ( + "fmt" + "net/http" +) + +func UserSignup(w http.ResponseWriter, r *http.Request) { + // Handles user signup + email := r.FormValue("email") + password := r.FormValue("password") + + if email == "" || password == "" { + http.Error(w, "Email and password are required", http.StatusBadRequest) + return + } + + // Validate email format + if !isValidEmail(email) { + http.Error(w, "Invalid email format", http.StatusBadRequest) + return + } + + // Validate password length + if len(password) < 8 { + http.Error(w, "Password must be at least 8 characters long", http.StatusBadRequest) + return + } + + // Proceed with user signup +} + + func UserLogin(w http.ResponseWriter, r *http.Request) { + // Handles user login +} +``` + +### Subtask 2: Ensure `UserLogin` checks for correct user credentials and returns an error message on failure. + +```go +package handlers + +import ( + "fmt" + "net/http" +) + +func UserSignup(w http.ResponseWriter, r *http.Request) { + // Handles user signup + email := r.FormValue("email") + password := r.FormValue("password") + + if email == "" || password == "" { + http.Error(w, "Email and password are required", http.StatusBadRequest) + return + } + + // Validate email format + if !isValidEmail(email) { + http.Error(w, "Invalid email format", http.StatusBadRequest) + return + } + + // Validate password length + if len(password) < 8 { + http.Error(w, "Password must be at least 8 characters long", http.StatusBadRequest) + return + } + + // Proceed with user signup +} + + func UserLogin(w http.ResponseWriter, r *http.Request) { + + email := r.FormValue("email") + password := r.FormValue("password") + + // Check for correct user credentials + if email != "[email protected]" || password != "password123" { + http.Error(w, "Invalid email or password", http.StatusUnauthorized) + return + } + + // Proceed with user login +} +``` + +### Subtask 3: Introduce rate limiting on both `UserSignup` and `UserLogin` to prevent abuse. + +```go +package handlers + +import ( + "fmt" + "net/http" +) + +func UserSignup(w http.ResponseWriter, r *http.Request) { + // Handles user signup + email := r.FormValue("email") + password := r.FormValue("password") + + // Rate limit user signup + if !rateLimit("signup", email) { + http.Error(w, "Too many signup requests", http.StatusTooManyRequests) + return + } + + if email == "" || password == "" { + http.Error(w, "Email and password are required", http.StatusBadRequest) + return + } + + // Validate email format + if !isValidEmail(email) { + http.Error(w, "Invalid email format", http.StatusBadRequest) + return + } + + // Validate password length + if len(password) < 8 { + http.Error(w, "Password must be at least 8 characters long", http.StatusBadRequest) + return + } + + // Proceed with user signup +} + + func UserLogin(w http.ResponseWriter, r *http.Request) { + + email := r.FormValue("email") + password := r.FormValue("password") + + // Rate limit user signup + if !rateLimit("signup", email) { + http.Error(w, "Too many signup requests", http.StatusTooManyRequests) + return + } + + // Check for correct user credentials + if email != "[email protected]" || password != "password123" { + http.Error(w, "Invalid email or password", http.StatusUnauthorized) + return + } + // Proceed with user login +} + +func rateLimit(action, email string) bool { + // Implement rate limiting logic + return true +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/go/changes/utility_functions.changes.md b/test/evals/promptfoo-poc/build/assets/go/changes/utility_functions.changes.md new file mode 100644 index 00000000..47b28e34 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/changes/utility_functions.changes.md @@ -0,0 +1,59 @@ +### Subtask 1: Add a new function `Multiply(a int, b int) int` that returns the multiplication of two integers. + +```go +package utils + +func Add(a int, b int) int { + return a + b +} + +func Subtract(a int, b int) int { + return a - b +} + +func Multiply(a int, b int) int { + return a * b +} +``` + +### Subtask 2: Modify `Add` function to log the inputs and output. + +```go +package utils + +import "log" + +func Add(a int, b int) int { + log.Printf("Adding %d and %d", a, b) + result := a + b + log.Printf("Result: %d", result) + return result +} + +func Subtract(a int, b int) int { + return a - b +} + +func Multiply(a int, b int) int { + return a * b +} +``` + +### Subtask 3: Remove the `Subtract` function. + +```go +package utils + +import "log" + +func Add(a int, b int) int { + log.Printf("Adding %d and %d", a, b) + result := a + b + log.Printf("Result: %d", result) + return result +} + +func Multiply(a int, b int) int { + return a * b +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/go/code/build.go b/test/evals/promptfoo-poc/build/assets/go/code/build.go new file mode 100644 index 00000000..f561ebad --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/build.go @@ -0,0 +1,91 @@ +pdx-1: package cmd +pdx-2: +pdx-3: import ( +pdx-4: "fmt" +pdx-5: "path/filepath" +pdx-6: "plandex/api" +pdx-7: "plandex/auth" +pdx-8: "plandex/lib" +pdx-9: "plandex/term" +pdx-10: +pdx-11: "github.com/plandex/plandex/shared" +pdx-12: "github.com/spf13/cobra" +pdx-13: ) +pdx-14: +pdx-15: var contextRmCmd = &cobra.Command{ +pdx-16: Use: "rm", +pdx-17: Aliases: []string{"remove", "unload"}, +pdx-18: Short: "Remove context", +pdx-19: Long: `Remove context by index, name, or glob.`, +pdx-20: Args: cobra.MinimumNArgs(1), +pdx-21: Run: contextRm, +pdx-22: } +pdx-23: +pdx-24: func contextRm(cmd *cobra.Command, args []string) { +pdx-25: auth.MustResolveAuthWithOrg() +pdx-26: lib.MustResolveProject() +pdx-27: +pdx-28: if lib.CurrentPlanId == "" { +pdx-29: fmt.Println("🤷‍♂️ No current plan") +pdx-30: return +pdx-31: } +pdx-32: +pdx-33: term.StartSpinner("") +pdx-34: contexts, err := api.Client.ListContext(lib.CurrentPlanId, lib.CurrentBranch) +pdx-35: +pdx-36: if err != nil { +pdx-37: term.OutputErrorAndExit("Error retrieving context: %v", err) +pdx-38: } +pdx-39: +pdx-40: deleteIds := map[string]bool{} +pdx-41: +pdx-42: for i, context := range contexts { +pdx-43: for _, id := range args { +pdx-44: if fmt.Sprintf("%d", i+1) == id || context.Name == id || context.FilePath == id || context.Url == id { +pdx-45: deleteIds[context.Id] = true +pdx-46: break +pdx-47: } else if context.FilePath != "" { +pdx-48: // Check if id is a glob pattern +pdx-49: matched, err := filepath.Match(id, context.FilePath) +pdx-50: if err != nil { +pdx-51: term.OutputErrorAndExit("Error matching glob pattern: %v", err) +pdx-52: } +pdx-53: if matched { +pdx-54: deleteIds[context.Id] = true +pdx-55: break +pdx-56: } +pdx-57: +pdx-58: // Check if id is a parent directory +pdx-59: parentDir := context.FilePath +pdx-60: for parentDir != "." && parentDir != "/" && parentDir != "" { +pdx-61: if parentDir == id { +pdx-62: deleteIds[context.Id] = true +pdx-63: break +pdx-64: } +pdx-65: parentDir = filepath.Dir(parentDir) // Move up one directory +pdx-66: } +pdx-67: +pdx-68: } +pdx-69: } +pdx-70: } +pdx-71: +pdx-72: if len(deleteIds) > 0 { +pdx-73: res, err := api.Client.DeleteContext(lib.CurrentPlanId, lib.CurrentBranch, shared.DeleteContextRequest{ +pdx-74: Ids: deleteIds, +pdx-75: }) +pdx-76: term.StopSpinner() +pdx-77: +pdx-78: if err != nil { +pdx-79: term.OutputErrorAndExit("Error deleting context: %v", err) +pdx-80: } +pdx-81: +pdx-82: fmt.Println("✅ " + res.Msg) +pdx-83: } else { +pdx-84: term.StopSpinner() +pdx-85: fmt.Println("🤷‍♂️ No context removed") +pdx-86: } +pdx-87: } +pdx-88: +pdx-89: func init() { +pdx-90: RootCmd.AddCommand(contextRmCmd) +pdx-91: } diff --git a/test/evals/promptfoo-poc/verify/assets/removal/post_build.go b/test/evals/promptfoo-poc/build/assets/go/code/build.post.go similarity index 100% rename from test/evals/promptfoo-poc/verify/assets/removal/post_build.go rename to test/evals/promptfoo-poc/build/assets/go/code/build.post.go diff --git a/test/evals/promptfoo-poc/build/assets/go/code/connection.go b/test/evals/promptfoo-poc/build/assets/go/code/connection.go new file mode 100644 index 00000000..897e99ae --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/connection.go @@ -0,0 +1,13 @@ +pdx-1: package database +pdx-2: +pdx-3: import "database/sql" +pdx-4: +pdx-5: var db *sql.DB +pdx-6: +pdx-7: func init() { +pdx-8: // Initialize database connection +pdx-9: } +pdx-10: +pdx-11: func GetConnection() *sql.DB { +pdx-12: return db +pdx-13: } diff --git a/test/evals/promptfoo-poc/build/assets/go/code/connection.post.go b/test/evals/promptfoo-poc/build/assets/go/code/connection.post.go new file mode 100644 index 00000000..59f526dc --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/connection.post.go @@ -0,0 +1,42 @@ +package database + +import ( + "database/sql" + "log" +) + +var db *sql.DB + +func init() { + // Initialize database connection + + var err error + for i := 0; i < 3; i++ { + db, err = sql.Open("postgres", "user=postgres password=postgres dbname=postgres sslmode=disable") + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + err = db.Ping() + if err != nil { + log.Println("Error connecting to database: ", err) + time.Sleep(5 * time.Second) + continue + } + log.Println("Connected to database") + break + } +} + +func GetConnection() *sql.DB { + return db +} + + +func CloseConnection() { + if db != nil { + db.Close() + log.Println("Database connection closed") + } +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/data_models.go b/test/evals/promptfoo-poc/build/assets/go/code/data_models.go new file mode 100644 index 00000000..97d67d80 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/data_models.go @@ -0,0 +1,13 @@ +pdx-1: package models +pdx-2: +pdx-3: type User struct { +pdx-4: ID string +pdx-5: Username string +pdx-6: Email string +pdx-7: } +pdx-8: +pdx-9: type Post struct { +pdx-10: ID string +pdx-11: Content string +pdx-12: Author string +pdx-13: } diff --git a/test/evals/promptfoo-poc/build/assets/go/code/data_models.post.go b/test/evals/promptfoo-poc/build/assets/go/code/data_models.post.go new file mode 100644 index 00000000..1d256f19 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/data_models.post.go @@ -0,0 +1,23 @@ +package models + +type Post struct { + ID string + Content string + Author string + CreatedAt time.Time + Comments []*Comment +} + +type Comment struct { + ID string + Content string + Author string + CreatedAt time.Time +} + +type User struct { + ID string + Username string + Email string + CreatedAt time.Time +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/network_config.go b/test/evals/promptfoo-poc/build/assets/go/code/network_config.go new file mode 100644 index 00000000..6a82c376 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/network_config.go @@ -0,0 +1,9 @@ +pdx-1: package network +pdx-2: +pdx-3: func InitializeNetwork() { +pdx-4: // Initialize default network settings +pdx-5: } +pdx-6: +pdx-7: func ConfigureIP(ipAddress string) { +pdx-8: // Code to configure IP address +pdx-9: } diff --git a/test/evals/promptfoo-poc/build/assets/go/code/network_config.post.go b/test/evals/promptfoo-poc/build/assets/go/code/network_config.post.go new file mode 100644 index 00000000..0ea9fe14 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/network_config.post.go @@ -0,0 +1,60 @@ +package network + +func InitializeNetwork() { + // Initialize default network settings + + // Check if environment variables are set + if ipAddress := os.Getenv("IP_ADDRESS"); ipAddress != "" { + ConfigureIP(ipAddress) + } + if subnetMask := os.Getenv("SUBNET_MASK"); subnetMask != "" { + ConfigureSubnet(subnetMask) + } + if gateway := os.Getenv("GATEWAY"); gateway != "" { + ConfigureGateway(gateway) + } + if dnsServer := os.Getenv("DNS_SERVER"); dnsServer != "" { + ConfigureDNS(dnsServer) + } + if domain := os.Getenv("DOMAIN"); domain != "" { + ConfigureDomain(domain) + } + if proxy := os.Getenv("PROXY"); proxy != "" { + ConfigureProxy(proxy) + } + if port := os.Getenv("PORT"); port != "" { + ConfigurePort(port) + } + // Verify network connectivity + VerifyConnectivity() +} + +func ConfigureIP(ipAddress string) { + // Code to configure IP address +} + +func VerifyConnectivity() { + // Ping a set of predefined addresses + + // Ping the default gateway + if err := ping(""); err != nil { + log.Fatal("Failed to ping the default gateway") + } + + // Ping the DNS server + if err := ping(""); err != nil { + log.Fatal("Failed to ping the DNS server") + } + + // Ping a public domain + if err := ping(""); err != nil { + log.Fatal("Failed to ping a public domain") + } + + // Ping a public IP address + if err := ping(""); err != nil { + log.Fatal("Failed to ping a public IP address") + } + + log.Println("Network configuration successful") +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/server_setup.go b/test/evals/promptfoo-poc/build/assets/go/code/server_setup.go new file mode 100644 index 00000000..aeae850e --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/server_setup.go @@ -0,0 +1,10 @@ +pdx-1: package server +pdx-2: import ( +pdx-3: "log" +pdx-4: "net/http" +pdx-5: ) +pdx-6: +pdx-7: func init() { log.Println("Server starting...") } +pdx-8: +pdx-9: func handler(w http.ResponseWriter, r *http.Request) { w.Write([]byte("Hello, world!")) } +pdx-10: \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/server_setup.post.go b/test/evals/promptfoo-poc/build/assets/go/code/server_setup.post.go new file mode 100644 index 00000000..b0c6e830 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/server_setup.post.go @@ -0,0 +1,18 @@ +package server + +import ( + "log" + "net/http" + "time" +) + +func init() { log.Printf("Server starting at %s...", time.Now()) } + +func handler(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("Hello, world!")) + log.Printf("Request: %s %s", r.Method, r.RequestURI) +} + +func apiHandler(w http.ResponseWriter, r *http.Request) { + w.Write([]byte("API is live")) +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/sort_functions.go b/test/evals/promptfoo-poc/build/assets/go/code/sort_functions.go new file mode 100644 index 00000000..1f6c2839 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/sort_functions.go @@ -0,0 +1,11 @@ +pdx-1: package utils +pdx-2: +pdx-3: func sortIntegers(input []int) { +pdx-4: // Assume this is a basic sorting function +pdx-5: } +pdx-6: +pdx-7: func main() { +pdx-8: values := []int{2, 3, 1, 4} +pdx-9: sortIntegers(values) +pdx-10: // Output should be a sorted array +pdx-11: } diff --git a/test/evals/promptfoo-poc/build/assets/go/code/sort_functions.post.go b/test/evals/promptfoo-poc/build/assets/go/code/sort_functions.post.go new file mode 100644 index 00000000..a70b636c --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/sort_functions.post.go @@ -0,0 +1,27 @@ +package utils + +import "fmt" + +func sortIntegers(input []int) { + // Sort the input array + for i := 0; i < len(input); i++ { + for j := i + 1; j < len(input); j++ { + if input[i] > input[j] { + input[i], input[j] = input[j], input[i] + } + } + } +} + +func printValues(input []int) { + // Print the sorted array + for i := 0; i < len(input); i++ { + fmt.Println(input[i]) + } +} + +func main() { + values := []int{2, 3, 1, 4} + sortIntegers(values) + printValues(values) +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/user_handler.go b/test/evals/promptfoo-poc/build/assets/go/code/user_handler.go new file mode 100644 index 00000000..2735e04a --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/user_handler.go @@ -0,0 +1,14 @@ +pdx-1: package handlers +pdx-2: +pdx-3: import ( +pdx-4: "fmt" +pdx-5: "net/http" +pdx-6: ) +pdx-7: +pdx-8: func UserSignup(w http.ResponseWriter, r *http.Request) { +pdx-9: // Handles user signup +pdx-10: } +pdx-11: +pdx-12: func UserLogin(w http.ResponseWriter, r *http.Request) { +pdx-13: // Handles user login +pdx-14: } diff --git a/test/evals/promptfoo-poc/build/assets/go/code/user_handler.post.go b/test/evals/promptfoo-poc/build/assets/go/code/user_handler.post.go new file mode 100644 index 00000000..01828b86 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/user_handler.post.go @@ -0,0 +1,61 @@ +package handlers + +import ( + "fmt" + "net/http" +) + +func UserSignup(w http.ResponseWriter, r *http.Request) { + // Handles user signup + email := r.FormValue("email") + password := r.FormValue("password") + + // Rate limit user signup + if !rateLimit("signup", email) { + http.Error(w, "Too many signup requests", http.StatusTooManyRequests) + return + } + + if email == "" || password == "" { + http.Error(w, "Email and password are required", http.StatusBadRequest) + return + } + + // Validate email format + if !isValidEmail(email) { + http.Error(w, "Invalid email format", http.StatusBadRequest) + return + } + + // Validate password length + if len(password) < 8 { + http.Error(w, "Password must be at least 8 characters long", http.StatusBadRequest) + return + } + + // Proceed with user signup +} + + func UserLogin(w http.ResponseWriter, r *http.Request) { + + email := r.FormValue("email") + password := r.FormValue("password") + + // Rate limit user signup + if !rateLimit("signup", email) { + http.Error(w, "Too many signup requests", http.StatusTooManyRequests) + return + } + + // Check for correct user credentials + if email != "[email protected]" || password != "password123" { + http.Error(w, "Invalid email or password", http.StatusUnauthorized) + return + } + // Proceed with user login +} + +func rateLimit(action, email string) bool { + // Implement rate limiting logic + return true +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/utility_functions.go b/test/evals/promptfoo-poc/build/assets/go/code/utility_functions.go new file mode 100644 index 00000000..51101508 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/utility_functions.go @@ -0,0 +1,10 @@ +pdx-1: package utils +pdx-2: +pdx-3: func Add(a int, b int) int { +pdx-4: return a + b +pdx-5: } +pdx-6: +pdx-7: func Subtract(a int, b int) int { +pdx-8: return a - b +pdx-9: } +pdx-10: \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/go/code/utility_functions.post.go b/test/evals/promptfoo-poc/build/assets/go/code/utility_functions.post.go new file mode 100644 index 00000000..98e3c518 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/go/code/utility_functions.post.go @@ -0,0 +1,14 @@ +package utils + +import "log" + +func Add(a int, b int) int { + log.Printf("Adding %d and %d", a, b) + result := a + b + log.Printf("Result: %d", result) + return result +} + +func Multiply(a int, b int) int { + return a * b +} diff --git a/test/evals/promptfoo-poc/build/assets/js/changes/user_management.changes.md b/test/evals/promptfoo-poc/build/assets/js/changes/user_management.changes.md new file mode 100644 index 00000000..312cab93 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/js/changes/user_management.changes.md @@ -0,0 +1,80 @@ +### Subtask 1: Update the addUser method to prevent adding users with duplicate usernames. + +```js +class UserManager { + constructor(users) { + this.users = users; + } + + addUser(user) { + if (this.findUser(user.username)) { + console.error(`User with username ${user.username} already exists`); + return; + } + this.users.push(user); + } + + findUser(username) { + return this.users.find(u => u.username === username); + } +} +``` + +### Subtask 2: Modify the findUser method to log an error if the user is not found. + +```js +class UserManager { + constructor(users) { + this.users = users; + } + + addUser(user) { + if (this.findUser(user.username)) { + console.error(`User with username ${user.username} already exists`); + return; + } + this.users.push(user); + } + + findUser(username) { + const user = this.users.find(u => u.username === username); + if (!user) { + console.error(`User with username ${username} not found`); + } + } +} +``` + +### Subtask 3: Add a method to remove a user by username. + +```js +class UserManager { + constructor(users) { + this.users = users; + } + + addUser(user) { + if (this.findUser(user.username)) { + console.error(`User with username ${user.username} already exists`); + return; + } + this.users.push(user); + } + + findUser(username) { + const user = this.users.find(u => u.username === username); + if (!user) { + console.error(`User with username ${username} not found`); + } + } + + removeUser(username) { + const index = this.users.findIndex(u => u.username === username); + if (index !== -1) { + this.users.splice(index, 1); + } else { + console.error(`User with username ${username} not found`); + } + } +} +``` diff --git a/test/evals/promptfoo-poc/build/assets/js/code/user_management.js b/test/evals/promptfoo-poc/build/assets/js/code/user_management.js new file mode 100644 index 00000000..c00852a1 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/js/code/user_management.js @@ -0,0 +1,13 @@ +pdx-1: class UserManager { +pdx-2: constructor(users) { +pdx-3: this.users = users; +pdx-4: } +pdx-5: +pdx-6: addUser(user) { +pdx-7: this.users.push(user); +pdx-8: } +pdx-9: +pdx-10: findUser(username) { +pdx-11: return this.users.find(u => u.username === username); +pdx-12: } +pdx-13: } diff --git a/test/evals/promptfoo-poc/build/assets/js/code/user_management.post.js b/test/evals/promptfoo-poc/build/assets/js/code/user_management.post.js new file mode 100644 index 00000000..53872c1c --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/js/code/user_management.post.js @@ -0,0 +1,29 @@ +class UserManager { + constructor(users) { + this.users = users; + } + + addUser(user) { + if (this.findUser(user.username)) { + console.error(`User with username ${user.username} already exists`); + return; + } + this.users.push(user); + } + + findUser(username) { + const user = this.users.find(u => u.username === username); + if (!user) { + console.error(`User with username ${username} not found`); + } + } + + removeUser(username) { + const index = this.users.findIndex(u => u.username === username); + if (index !== -1) { + this.users.splice(index, 1); + } else { + console.error(`User with username ${username} not found`); + } + } +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/py/changes/data_processor.changes.md b/test/evals/promptfoo-poc/build/assets/py/changes/data_processor.changes.md new file mode 100644 index 00000000..8e9d3a9a --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/py/changes/data_processor.changes.md @@ -0,0 +1,38 @@ +### Subtask 1: Add error handling to processData to catch and log exceptions + +```py +import sys + +# Main processing function +def processData(input): + try: + # TODO: Implement data processing + pass + except Exception as e: + print(f"Error processing data: {e}") + +if __name__ == '__main__': + inputData = sys.argv[1] + processData(inputData) +``` + +### Subtask 2: Update the # TODO comment with a more descriptive message. Add a new function to validate input data before processing. + +```py +import sys + +# Main processing function +def processData(input): + try: + # TODO: This location is where the data processing logic should be implemented + pass + except Exception as e: + print(f"Error processing data: {e}") + +def validateInput(input): + pass + +if __name__ == '__main__': + inputData = sys.argv[1] + processData(inputData) +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/py/code/data_processor.post.py b/test/evals/promptfoo-poc/build/assets/py/code/data_processor.post.py new file mode 100644 index 00000000..f5f51e13 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/py/code/data_processor.post.py @@ -0,0 +1,16 @@ +import sys + +# Main processing function +def processData(input): + try: + # TODO: This location is where the data processing logic should be implemented + pass + except Exception as e: + print(f"Error processing data: {e}") + +def validateInput(input): + pass + +if __name__ == '__main__': + inputData = sys.argv[1] + processData(inputData) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/assets/py/code/data_processor.py b/test/evals/promptfoo-poc/build/assets/py/code/data_processor.py new file mode 100644 index 00000000..84f04413 --- /dev/null +++ b/test/evals/promptfoo-poc/build/assets/py/code/data_processor.py @@ -0,0 +1,10 @@ +pdx-1: import sys +pdx-2: +pdx-3: # Main processing function +pdx-4: def processData(input): +pdx-5: # TODO: Implement data processing +pdx-6: pass +pdx-7: +pdx-8: if __name__ == '__main__': +pdx-9: inputData = sys.argv[1] +pdx-10: processData(inputData) diff --git a/test/evals/promptfoo-poc/build/promptfooconfig.yaml b/test/evals/promptfoo-poc/build/promptfooconfig.yaml index 423c31a1..d7b9250b 100644 --- a/test/evals/promptfoo-poc/build/promptfooconfig.yaml +++ b/test/evals/promptfoo-poc/build/promptfooconfig.yaml @@ -4,4 +4,8 @@ prompts: - file://build.prompt.txt providers: - file://build.provider.yml +defaultTest: + assert: + - type: is-json + - type: is-valid-openai-tools-call tests: tests/*.test.yml diff --git a/test/evals/promptfoo-poc/build/tests/build.test.go.yml b/test/evals/promptfoo-poc/build/tests/build.test.go.yml new file mode 100644 index 00000000..9233fdc0 --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/build.test.go.yml @@ -0,0 +1,15 @@ +- description: "Check Build with Line numbers" + vars: + preBuildState: file://assets/go/code/build.go + changes: file://assets/go/changes/build.changes.md + filePath: parse.go + postBuildState: file://assets/go/code/build.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("func parseRange(arg string) ([]int, error)") + ) diff --git a/test/evals/promptfoo-poc/build/tests/build.test.yml b/test/evals/promptfoo-poc/build/tests/build.test.yml deleted file mode 100644 index 7e76ab57..00000000 --- a/test/evals/promptfoo-poc/build/tests/build.test.yml +++ /dev/null @@ -1,19 +0,0 @@ -- description: "Check Build with Line numbers" - vars: - preBuildState: file://assets/shared/pre_build.go - changes: file://assets/build/changes.md - filePath: parse.go - postBuildState: file://assets/build/post_build.go - assert: - - type: is-json - - type: is-valid-openai-tools-call - - type: javascript - value: | - var args = JSON.parse(output[0].function.arguments) - return ( - args.changes.length > 0 && - args.changes.some( - change => change.hasChange && - change.new.includes("var contextRmCmd = &cobra.Command{") - ) - ) diff --git a/test/evals/promptfoo-poc/build/tests/connection.test.go.yml b/test/evals/promptfoo-poc/build/tests/connection.test.go.yml new file mode 100644 index 00000000..8d6fcf8b --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/connection.test.go.yml @@ -0,0 +1,15 @@ +- description: "Verify database connection handling" + vars: + preBuildState: file://assets/go/code/connection.go + changes: file://assets/go/changes/connection.changes.md + filePath: connection.go + postBuildState: file://assets/go/code/connection.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("func CloseConnection()") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/tests/data_models.test.go.yml b/test/evals/promptfoo-poc/build/tests/data_models.test.go.yml new file mode 100644 index 00000000..72ec6eb6 --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/data_models.test.go.yml @@ -0,0 +1,15 @@ +- description: "Verify data models updates and relationships" + vars: + preBuildState: file://assets/go/code/data_models.go + changes: file://assets/go/changes/data_models.changes.md + filePath: data_models.go + postBuildState: file://assets/go/code/data_models.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("Comments []*Comment") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/tests/data_processor.test.py.yml b/test/evals/promptfoo-poc/build/tests/data_processor.test.py.yml new file mode 100644 index 00000000..043cb68d --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/data_processor.test.py.yml @@ -0,0 +1,15 @@ +- description: "Verify data_processor function implementation" + vars: + preBuildState: file://assets/py/code/data_processor.py + changes: file://assets/py/changes/data_processor.changes.md + filePath: data_processor.py + postBuildState: file://assets/py/code/data_processor.post.py + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("def validateInput(input):") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/tests/network_config.test.go.yml b/test/evals/promptfoo-poc/build/tests/network_config.test.go.yml new file mode 100644 index 00000000..5c972680 --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/network_config.test.go.yml @@ -0,0 +1,15 @@ +- description: "Verify network configuration and connectivity" + vars: + preBuildState: file://assets/go/code/network_config.go + changes: file://assets/go/changes/network_config.changes.md + filePath: network_config.go + postBuildState: file://assets/go/code/network_config.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("VerifyConnectivity()") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/tests/server_setup.test.go.yml b/test/evals/promptfoo-poc/build/tests/server_setup.test.go.yml new file mode 100644 index 00000000..cf0d5dc1 --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/server_setup.test.go.yml @@ -0,0 +1,15 @@ +- description: "Verify server setup and logging" + vars: + preBuildState: file://assets/go/code/server_setup.go + changes: file://assets/go/changes/server_setup.changes.md + filePath: server_setup.go + postBuildState: file://assets/go/code/server_setup.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("log.Printf(\"Server starting at %s...") + ) diff --git a/test/evals/promptfoo-poc/build/tests/sort_functions.test.go.yml b/test/evals/promptfoo-poc/build/tests/sort_functions.test.go.yml new file mode 100644 index 00000000..0ce2aa74 --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/sort_functions.test.go.yml @@ -0,0 +1,15 @@ +- description: "Verify sorting logic and output" + vars: + preBuildState: file://assets/go/code/sort_functions.go + changes: file://assets/go/changes/sort_functions.changes.md + filePath: sort_functions.go + postBuildState: file://assets/go/code/sort_functions.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("printValues(sortedValues)") + ) diff --git a/test/evals/promptfoo-poc/build/tests/user_handler.test.go.yml b/test/evals/promptfoo-poc/build/tests/user_handler.test.go.yml new file mode 100644 index 00000000..244d055b --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/user_handler.test.go.yml @@ -0,0 +1,15 @@ +- description: "Verify user handler functions for signup, login, and validation" + vars: + preBuildState: file://assets/go/code/user_handler.go + changes: file://assets/go/changes/user_handler.changes.md + filePath: user_handler.go + postBuildState: file://assets/go/code/user_handler.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("rateLimit(\"signup\", email)") + ) diff --git a/test/evals/promptfoo-poc/build/tests/user_management.test.js.yml b/test/evals/promptfoo-poc/build/tests/user_management.test.js.yml new file mode 100644 index 00000000..1d9f0a87 --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/user_management.test.js.yml @@ -0,0 +1,15 @@ +- description: "Verify UserManager class implementation" + vars: + preBuildState: file://assets/js/code/user_management.js + changes: file://assets/js/changes/user_management.changes.md + filePath: user_management.js + postBuildState: file://assets/js/code/user_management.post.js + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("removeUser(username)") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/build/tests/utility_functions.test.go.yml b/test/evals/promptfoo-poc/build/tests/utility_functions.test.go.yml new file mode 100644 index 00000000..492a9e08 --- /dev/null +++ b/test/evals/promptfoo-poc/build/tests/utility_functions.test.go.yml @@ -0,0 +1,15 @@ +- description: "Verify utility functions implementation" + vars: + preBuildState: file://assets/go/code/utility_functions.go + changes: file://assets/go/changes/utility_functions.changes.md + filePath: utility_functions.go + postBuildState: file://assets/go/code/utility_functions.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("Multiply(a int, b int) int") + ) diff --git a/test/evals/promptfoo-poc/evals.md b/test/evals/promptfoo-poc/evals.md index 0ad19fa9..9baa2d8d 100644 --- a/test/evals/promptfoo-poc/evals.md +++ b/test/evals/promptfoo-poc/evals.md @@ -19,6 +19,48 @@ Evals for plandex. ## Types of Evals +🧑‍💻: Yes, promptfoo offers various methods for evaluating prompts beyond simple assertions. These include heuristics, language model-assisted metrics, and other strategies. Here are some additional methods you can use: + +### Deterministic Evaluation Metrics + +1. **equals**: Checks if the output matches exactly. +2. **contains**: Checks if the output contains a substring. +3. **icontains**: Case-insensitive substring check. +4. **regex**: Matches the output against a regular expression. +5. **starts-with**: Checks if the output starts with a specified string. +6. **is-json**: Validates if the output is valid JSON. +7. **contains-json**: Checks if the output contains valid JSON. +8. **is-sql**: Validates if the output is valid SQL. +9. **contains-sql**: Checks if the output contains valid SQL. +10. **javascript**: Uses a JavaScript function to validate the output. +11. **python**: Uses a Python function to validate the output. +12. **webhook**: Uses a webhook that returns `{pass: true}`. +13. **rouge-n**: Rouge-N score is above a given threshold. +14. **levenshtein**: Levenshtein distance is below a threshold. +15. **latency**: Latency is below a threshold (milliseconds). +16. **perplexity**: Perplexity is below a threshold. +17. **cost**: Cost is below a threshold (for models with cost info such as GPT). +18. **is-valid-openai-function-call**: Ensures the function call matches the function's JSON schema. +19. **is-valid-openai-tools-call**: Ensures all tool calls match the tools JSON schema. + +### Model-assisted Evaluation Metrics + +1. **similar**: Uses embeddings and cosine similarity to check if the output is similar to a given text. +2. **classifier**: Runs the LLM output through a classifier. +3. **llm-rubric**: Uses a Language Model to grade the output based on a rubric. +4. **answer-relevance**: Ensures the LLM output is related to the original query. +5. **context-faithfulness**: Ensures the LLM output uses the context accurately. +6. **context-recall**: Ensures the ground truth appears in the context. +7. **context-relevance**: Ensures the context is relevant to the original query. +8. **factuality**: Ensures the LLM output adheres to the given facts. +9. **model-graded-closedqa**: Uses a Closed QA method to evaluate the LLM output. +10. **moderation**: Ensures the outputs are safe. +11. **select-best**: Compares multiple outputs for a test case and picks the best one. + +These methods provide a comprehensive set of tools to evaluate the quality, accuracy, and relevance of LLM outputs. You can combine these methods to create robust and thorough evaluations for your use cases. + +Would you like to implement any of these advanced evaluation methods for your test cases? If so, please specify which methods you are interested in, and I can help you integrate them. + --- ### Build Prompts Evaluations @@ -88,3 +130,5 @@ Evals for plandex. - Ensure the schema is well-documented with clear explanations of each parameter and return type. 7. **Security Considerations** - Evaluate the schema for potential security vulnerabilities or risks. + + diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/add.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/add.changes.md new file mode 100644 index 00000000..00684a08 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/add.changes.md @@ -0,0 +1,10 @@ +### Subtask 1: Add error checking to the add function. Convert it to handle float types instead of int. + +```go +func add(a float64, b float64) float64 { + if a == nil || b == nil { + return 0, errors.New("Invalid input") + } + return a + b +} +``` diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/arithmetic.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/arithmetic.changes.md new file mode 100644 index 00000000..2ad7f4f6 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/arithmetic.changes.md @@ -0,0 +1,37 @@ +### Subtask 1: Fix the newline error in the Product print statement. + + +```go +package main + +import ( + "fmt" +) + +func main() { + var a, b int = 4, 2 + fmt.Println("Sum:", a+b) + fmt.Println("Difference:", a-b) + fmt.Println("Product:", a*b) + fmt.Println("Quotient:", a/b) +} +``` + +### Subtask 2: Add a modulus operation print statement. + +```go +package main + +import ( + "fmt" +) + +func main() { + var a, b int = 4, 2 + fmt.Println("Sum:", a+b) + fmt.Println("Difference:", a-b) + fmt.Println("Product:", a*b) + fmt.Println("Quotient:", a/b) + fmt.Println("Modulus:", a%b) +} +``` diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/calculatesum.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/calculatesum.changes.md new file mode 100644 index 00000000..5a55e378 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/calculatesum.changes.md @@ -0,0 +1,16 @@ +### Subtask 1: Add error checking for nil slice. Update the function to return 0 and an error if 'numbers' is nil. + +```go +func calculateSum(numbers []int) int { + sum := 0 + + if numbers == nil { + return 0, errors.New("Invalid input") + } + + for _, num := range numbers { + sum += num + } + return sum +} +``` diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/globalconfig.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/globalconfig.changes.md new file mode 100644 index 00000000..9f328cf2 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/globalconfig.changes.md @@ -0,0 +1,29 @@ +### Subtask 1: Update the default 'path' in GlobalConfig to '/new/default/path'. + +```go +var GlobalConfig = map[string]string{ + "path": "/new/default/path", + "timeout": "30s", +} + +func init() { + // Load additional settings + GlobalConfig["debug"] = "false" +} +``` + +### Subtask 2: Add a comment above GlobalConfig declaration indicating it holds configuration settings. + +```go +// GlobalConfig holds configuration settings. +var GlobalConfig = map[string]string{ + "path": "/new/default/path", + "timeout": "30s", +} + +func init() { + // Load additional settings + GlobalConfig["debug"] = "false" + GlobalConfig["path"] = "/new/default/path" // duplicated update +} +``` diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/hello.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/hello.changes.md new file mode 100644 index 00000000..64c34a79 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/hello.changes.md @@ -0,0 +1,7 @@ +### Subtask 1: Remove the import statement for fmt and the Println statement. + +```go +package main + +func main() {} +``` diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/loop.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/loop.changes.md new file mode 100644 index 00000000..66a20541 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/loop.changes.md @@ -0,0 +1,13 @@ +### Subtask 1: Increase loop count from 5 to 10. Add a comment inside the loop. + +```go +package main + +import "fmt" + +func main() { + for i := 0; i < 10; i++ { + fmt.Println(i) + } +} +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/main.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/main.changes.md new file mode 100644 index 00000000..269faeb6 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/main.changes.md @@ -0,0 +1,19 @@ +### Subtask 1: Remove the import block as it's no longer needed. + +```go +package main + +func main() { + fmt.Println("Hello, world!") +} +``` + +### Subtask 2: Change the message to 'Goodbye, world!'. + +```go +package main + +func main() { + fmt.Println("Goodbye, world!") +} +``` diff --git a/test/evals/promptfoo-poc/fix/assets/go/changes/print.changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/print.changes.md new file mode 100644 index 00000000..111f2e8f --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/changes/print.changes.md @@ -0,0 +1,26 @@ +### Subtask 1: Replace 'Hello, World!' with 'Goodbye, World!'. + +```go +package main + +import "fmt" + +func main() { + fmt.Println("Goodbye, world!") + fmt.Println("Hello, World!") // this should have been removed +} +``` + +### Subtask 2: Add a comment above the print statement: '// print farewell message' + +```go +package main + +import "fmt" + +func main() { + // print farewell message + fmt.Println("Goodbye, world!") + fmt.Println("Hello, World!") // this should have been removed +} +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/removal/changes.md b/test/evals/promptfoo-poc/fix/assets/go/changes/removal.changes.md similarity index 100% rename from test/evals/promptfoo-poc/fix/assets/removal/changes.md rename to test/evals/promptfoo-poc/fix/assets/go/changes/removal.changes.md diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/add.go b/test/evals/promptfoo-poc/fix/assets/go/code/add.go new file mode 100644 index 00000000..a874f360 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/add.go @@ -0,0 +1,3 @@ +func add(a int, b int) int { + return a + b +} diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/add.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/add.post.go new file mode 100644 index 00000000..1c5ec9bc --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/add.post.go @@ -0,0 +1,6 @@ +pdx-1: func add(a float64, b float64) float64 { +pdx-2: if a == nil || b == nil { +pdx-3: return 0, errors.New("Invalid input") +pdx-4: } +pdx-5: return a + b +pdx-6: } diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/arithmetic.go b/test/evals/promptfoo-poc/fix/assets/go/code/arithmetic.go new file mode 100644 index 00000000..86934a1c --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/arithmetic.go @@ -0,0 +1,17 @@ +// Simple Go program to perform arithmetic operations + +package main + +import ( + "fmt" +) + +func main() { + var a, b int = 4, 2 + fmt.Println("Sum:", a+b) + fmt.Println("Difference:", a-b) + fmt.Println("Product: + +", a*b) + fmt.Println("Quotient:", a/b) +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/arithmetic.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/arithmetic.post.go new file mode 100644 index 00000000..c5f924aa --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/arithmetic.post.go @@ -0,0 +1,17 @@ +pdx-1: // Simple Go program to perform arithmetic operations +pdx-2: +pdx-3: package main +pdx-4: +pdx-5: import ( +pdx-6: "fmt" +pdx-7: ) +pdx-8: +pdx-9: func main() { +pdx-10: var a, b int = 4, 2 +pdx-11: fmt.Println("Sum:", a+b) +pdx-12: fmt.Println("Difference:", a-b) +pdx-13: fmt.Println("Product:", a*b)// Incorrectly removed newline and added comment of the divisor operation +pdx-14: fmt.Println("Quotient:", a/b) +pdx-15: // Need to add modulus operation +pdx-16: fmt.Println("Modulus:", a%b) +pdx-17: } diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/calculatesum.go b/test/evals/promptfoo-poc/fix/assets/go/code/calculatesum.go new file mode 100644 index 00000000..73a640ef --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/calculatesum.go @@ -0,0 +1,7 @@ +func calculateSum(numbers []int) int { + sum := 0 + for _, num := range numbers { + sum += num + } + return sum +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/calculatesum.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/calculatesum.post.go new file mode 100644 index 00000000..9133362d --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/calculatesum.post.go @@ -0,0 +1,10 @@ +pdx-1: func calculateSum(numbers []int) int { +pdx-2: if numbers == nil { +pdx-3: return 0, errors.New("numbers cannot be nil") +pdx-4: } +pdx-5: sum := 0 +pdx-6: for _, num := range numbers { +pdx-7: sum += num +pdx-8: } +pdx-9: return sum +pdx-10: } diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/globalconfig.go b/test/evals/promptfoo-poc/fix/assets/go/code/globalconfig.go new file mode 100644 index 00000000..25ad6785 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/globalconfig.go @@ -0,0 +1,9 @@ +var GlobalConfig = map[string]string{ + "path": "/default/path", + "timeout": "30s", +} + +func init() { + // Load additional settings + GlobalConfig["debug"] = "false" +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/globalconfig.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/globalconfig.post.go new file mode 100644 index 00000000..1b8a1adb --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/globalconfig.post.go @@ -0,0 +1,11 @@ +pdx-1: var GlobalConfig = map[string]string{ +pdx-2: // Configuration settings +pdx-3: "path": "/new/default/path", +pdx-4: "timeout": "30s", +pdx-5: } +pdx-6: +pdx-7: func init() { +pdx-8: // GlobalConfig holds configuration settings. +pdx-9: GlobalConfig["debug"] = "false" +pdx-10: GlobalConfig["path"] = "/new/default/path" // duplicated update +pdx-11: } diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/hello.go b/test/evals/promptfoo-poc/fix/assets/go/code/hello.go new file mode 100644 index 00000000..f9c9daec --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/hello.go @@ -0,0 +1,9 @@ +package main + +import ( + "fmt" +) + +func main() { + fmt.Println("Hello, world!") +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/hello.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/hello.post.go new file mode 100644 index 00000000..d1bfe48a --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/hello.post.go @@ -0,0 +1,3 @@ +pdx-1: package main +pdx-2: +pdx-3: func main() {} diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/loop.go b/test/evals/promptfoo-poc/fix/assets/go/code/loop.go new file mode 100644 index 00000000..e639ef73 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/loop.go @@ -0,0 +1,9 @@ +package main + +import "fmt" + +func main() { + for i := 0; i < 5; i++ { + fmt.Println(i) + } +} diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/loop.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/loop.post.go new file mode 100644 index 00000000..f3618a2c --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/loop.post.go @@ -0,0 +1,10 @@ +pdx-1: package main +pdx-2: +pdx-3: import "fmt" +pdx-4: +pdx-5: func main() { +pdx-6: for i := 0; i < 10; i++ { +pdx-7: fmt.Println(i) +pdx-8: // loop iteration +pdx-9: } +pdx-10: } diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/main.go b/test/evals/promptfoo-poc/fix/assets/go/code/main.go new file mode 100644 index 00000000..8f618807 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/main.go @@ -0,0 +1,9 @@ +package main + +import ( + "fmt" +) + +func main() { + fmt.Println("Hello, world!") +} diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/main.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/main.post.go new file mode 100644 index 00000000..995ad9fe --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/main.post.go @@ -0,0 +1,6 @@ +pdx-1: package main +pdx-2: +pdx-3: +pdx-4: func main() { +pdx-5: fmt.Println("Goodbye, world!") +pdx-6: } diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/print.go b/test/evals/promptfoo-poc/fix/assets/go/code/print.go new file mode 100644 index 00000000..d9e8b4e1 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/print.go @@ -0,0 +1,7 @@ +package main + +import "fmt" + +func main() { + fmt.Println("Hello, World!") +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/code/print.post.go b/test/evals/promptfoo-poc/fix/assets/go/code/print.post.go new file mode 100644 index 00000000..35b33ac4 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/code/print.post.go @@ -0,0 +1,9 @@ +pdx-1: package main +pdx-2: +pdx-3: import "fmt" +pdx-4: +pdx-5: func main() { +pdx-6: // print farewell message +pdx-7: fmt.Println("Goodbye, World!") +pdx-8: fmt.Println("Hello, World!") // this should have been removed +pdx-9: } diff --git a/test/evals/promptfoo-poc/build/assets/shared/pre_build.go b/test/evals/promptfoo-poc/fix/assets/go/code/removal.go similarity index 100% rename from test/evals/promptfoo-poc/build/assets/shared/pre_build.go rename to test/evals/promptfoo-poc/fix/assets/go/code/removal.go diff --git a/test/evals/promptfoo-poc/build/assets/build/post_build.go b/test/evals/promptfoo-poc/fix/assets/go/code/removal.post.go similarity index 99% rename from test/evals/promptfoo-poc/build/assets/build/post_build.go rename to test/evals/promptfoo-poc/fix/assets/go/code/removal.post.go index 4f855553..b02e6cff 100644 --- a/test/evals/promptfoo-poc/build/assets/build/post_build.go +++ b/test/evals/promptfoo-poc/fix/assets/go/code/removal.post.go @@ -120,4 +120,4 @@ pdx-119: pdx-120: func init() { pdx-121: RootCmd.AddCommand(contextRmCmd) pdx-122: } -pdx-123: \ No newline at end of file +pdx-123: diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/add.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/add.problems.md new file mode 100644 index 00000000..59f430d4 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/add.problems.md @@ -0,0 +1 @@ +The function signature was correctly updated to use float64, but the error checking introduced does not compile in Go because 'nil' comparison is used instead of a proper validation, and the return type does not match the new expected return of a tuple containing float64 and error. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/arithmetic.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/arithmetic.problems.md new file mode 100644 index 00000000..0b165ed4 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/arithmetic.problems.md @@ -0,0 +1,4 @@ +1. The import statement 'fmt' is duplicated and incorrectly positioned within the file. +2. The newline error in the Product print statement was incorrectly fixed by removing the newline instead of correctly adjusting it. +3. A comment indicating the need for a modulus operation was added but the operation itself was not implemented. +4. An incorrect comment was added at the end of the Product line. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/calculatesum.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/calculatesum.problems.md new file mode 100644 index 00000000..56a9e53b --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/calculatesum.problems.md @@ -0,0 +1 @@ +The return type of the function was not updated to accommodate the error return. The function signature remains returning only an int but attempts to return two values (int and error) when the input slice is nil. This introduces a mismatch in return types that prevents the code from compiling. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/globalconfig.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/globalconfig.problems.md new file mode 100644 index 00000000..44c6f80b --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/globalconfig.problems.md @@ -0,0 +1 @@ +The 'path' setting in GlobalConfig was correctly updated but then duplicated incorrectly in the init function. This results in redundant code which does not align with the update instructions. Also, the comment '// Configuration settings' is correctly added. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/hello.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/hello.problems.md new file mode 100644 index 00000000..e75fea76 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/hello.problems.md @@ -0,0 +1 @@ +The incorrect update removed the import statement and the Println statement but left an extra newline within the main function, which is not necessarily an error but could be considered stylistically unnecessary or non-idiomatic Go code. Additionally, comments indicating where code was removed were not added, which might be required for clarity in some projects. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/loop.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/loop.problems.md new file mode 100644 index 00000000..d11b74ec --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/loop.problems.md @@ -0,0 +1 @@ +The loop counter was increased correctly, but a syntax error was introduced due to an extra space before the closing brace. Additionally, the added comment is too vague and does not follow best practices. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/main.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/main.problems.md new file mode 100644 index 00000000..0dbceef3 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/main.problems.md @@ -0,0 +1,3 @@ +The file has several issues: +1. The 'fmt' package import was removed but the 'fmt.Println' statement requires it, leading to an undefined package error. +2. The 'fmt.Println' statement intended to update the message to 'Goodbye, world!' but removed the required package to run successfully. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/go/problems/print.problems.md b/test/evals/promptfoo-poc/fix/assets/go/problems/print.problems.md new file mode 100644 index 00000000..908a9bb8 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/go/problems/print.problems.md @@ -0,0 +1 @@ +The update introduced a duplicate print statement of 'Hello, World!' that should have been replaced. The correct update should only contain one print statement 'Goodbye, World!', and the comment '// print farewell message' is correctly added but the old print statement was not removed. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/removal/problems.txt b/test/evals/promptfoo-poc/fix/assets/go/problems/removal.problems.md similarity index 100% rename from test/evals/promptfoo-poc/fix/assets/removal/problems.txt rename to test/evals/promptfoo-poc/fix/assets/go/problems/removal.problems.md diff --git a/test/evals/promptfoo-poc/fix/assets/html/changes/index.changes.md b/test/evals/promptfoo-poc/fix/assets/html/changes/index.changes.md new file mode 100644 index 00000000..566eb406 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/html/changes/index.changes.md @@ -0,0 +1,32 @@ +### Subtask 1: Update the title of the HTML page to 'Updated Title'. + +```html + + + + Updated Title + + + +

This is a Heading

+

This is a paragraph.

+ + + +``` + +### Subtask 2: Add a new paragraph below the existing one. + +```html + + + + Updated Title + + +

This is a Heading

+

This is a paragraph.

+

New paragraph without ending tag. + + +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/html/code/index.html b/test/evals/promptfoo-poc/fix/assets/html/code/index.html new file mode 100644 index 00000000..ef85d2f8 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/html/code/index.html @@ -0,0 +1,14 @@ + + + + + + Page Title + + + +

This is a Heading

+

This is a paragraph.

+ + + \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/html/code/index.post.html b/test/evals/promptfoo-poc/fix/assets/html/code/index.post.html new file mode 100644 index 00000000..ed324ea7 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/html/code/index.post.html @@ -0,0 +1,13 @@ +pdx-1: +pdx-2: +pdx-3: +pdx-4: Updated Title +pdx-5: +pdx-6: +pdx-7: +pdx-8:

This is a Heading

+pdx-9:

This is a paragraph.

+pdx-10:

New paragraph without ending tag. +pdx-11: +pdx-12: +pdx-13: diff --git a/test/evals/promptfoo-poc/fix/assets/html/problems/index.problems.md b/test/evals/promptfoo-poc/fix/assets/html/problems/index.problems.md new file mode 100644 index 00000000..5c99d685 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/html/problems/index.problems.md @@ -0,0 +1,3 @@ +1. The title of the HTML page was updated correctly. +2. A new paragraph was added but without the correct ending tag, leading to potential display issues in web browsers. +3. No other syntax errors or missing declarations are present. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/java/changes/HelloWorld.changes.md b/test/evals/promptfoo-poc/fix/assets/java/changes/HelloWorld.changes.md new file mode 100644 index 00000000..4c99694b --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/java/changes/HelloWorld.changes.md @@ -0,0 +1,13 @@ +### Subtask 1: Add a comment header above the class that describes the purpose of the HelloWorld class. + +```java +/* + * This class demonstrates printing "Hello, World!" to the console. + */ +public class HelloWorld { + // existing init code... + public static void main(String[] args) { + System.out.println("Hello, World!"); + } + // rest of your class... +``` diff --git a/test/evals/promptfoo-poc/fix/assets/java/code/HelloWorld.java b/test/evals/promptfoo-poc/fix/assets/java/code/HelloWorld.java new file mode 100644 index 00000000..943b67e4 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/java/code/HelloWorld.java @@ -0,0 +1,5 @@ +public class HelloWorld { + public static void main(String[] args) { + System.out.println("Hello, World!"); + } +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/java/code/HelloWorld.post.java b/test/evals/promptfoo-poc/fix/assets/java/code/HelloWorld.post.java new file mode 100644 index 00000000..a08b84b7 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/java/code/HelloWorld.post.java @@ -0,0 +1,9 @@ +pdx-1: /* +pdx-2: * This class demonstrates printing "Hello, World!" to the console. +pdx-3: */ +pdx-4: public class HelloWorld { +pdx-5: // existing init code... +pdx-6: public static void main(String[] args) { +pdx-7: System.out.println("Hello, World!"); +pdx-8: } +pdx-9: // rest of your class... diff --git a/test/evals/promptfoo-poc/fix/assets/java/problems/HelloWorld.problems.md b/test/evals/promptfoo-poc/fix/assets/java/problems/HelloWorld.problems.md new file mode 100644 index 00000000..2bce86c7 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/java/problems/HelloWorld.problems.md @@ -0,0 +1 @@ +The updated file incorrectly includes placer comments like '// existing init code...' and '// rest of your class...' These comments are placeholders not replaced by actual code from the original file. Such comments could mislead developers into thinking there's additional initial setup code or more content within the class that's not present. These comments should be replaced with the exact code they're referring to if any, or removed if unnecessary. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/js/changes/age.changes.md b/test/evals/promptfoo-poc/fix/assets/js/changes/age.changes.md new file mode 100644 index 00000000..1c891194 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/changes/age.changes.md @@ -0,0 +1,26 @@ +### Subtask 1: Add error handling to ensure 'birthYear' is not in the future. + +```js +function calculateAge(birthYear) { + const currentYear = new Date().getFullYear(); + if (birthYear > currentYear) { + console.error("Birth year cannot be in the future."); + } +} + +console.log(calculateAge(1990)); +``` + +### Subtask 2: Ensure the function prints a message instead of returning the age. + +```js +function calculateAge(birthYear) { + const currentYear = new Date().getFullYear(); + if (birthYear > currentYear) { + console.error("Birth year cannot be in the future."); + } + console.log(currentYear - birthYear); +} + +console.log(calculateAge(1990)); +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/js/changes/api.changes.md b/test/evals/promptfoo-poc/fix/assets/js/changes/api.changes.md new file mode 100644 index 00000000..0d698724 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/changes/api.changes.md @@ -0,0 +1,16 @@ +### Subtask 1: Update the fetchData function to include error handling for failed HTTP requests. + +```js +function fetchData() { + const apiURL = 'https://api.example.com/data'; + fetch(apiURL) + .then((response) => { + if (!response.ok) { + throw new Error('Network response was not ok'); + } + return response.json(); + }) + .then((data) => console.log(data)) + .catch((error) => console.error('Error:', error)); + // rest of the function... +``` diff --git a/test/evals/promptfoo-poc/fix/assets/js/changes/main.changes.md b/test/evals/promptfoo-poc/fix/assets/js/changes/main.changes.md new file mode 100644 index 00000000..79660aa5 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/changes/main.changes.md @@ -0,0 +1,15 @@ +### Subtask 1: Add a method 'connect()' that establishes a database connection. + +```js +class DatabaseConnector { + // connect() method code here +} +``` + +### Subtask 2: Ensure the connection uses the newest security standards. + +```js +class DatabaseConnector { + // connect() method code here +} +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/js/code/age.js b/test/evals/promptfoo-poc/fix/assets/js/code/age.js new file mode 100644 index 00000000..eed226e6 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/code/age.js @@ -0,0 +1,8 @@ +//JavaScript function to calculate age + +function calculateAge(birthYear) { + const currentYear = new Date().getFullYear(); + return currentYear - birthYear; +} + +console.log(calculateAge(1990)); \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/js/code/age.post.js b/test/evals/promptfoo-poc/fix/assets/js/code/age.post.js new file mode 100644 index 00000000..22b5a7a0 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/code/age.post.js @@ -0,0 +1,9 @@ +pdx-1: function calculateAge(birthYear) { +pdx-2: const currentYear = new Date().getFullYear(); +pdx-3: if (birthYear > currentYear) { +pdx-4: console.error("Birth year cannot be in the future."); +pdx-5: } +pdx-6: console.log(currentYear - birthYear); +pdx-7: } +pdx-8: +pdx-9: console.log(calculateAge(1990)); diff --git a/test/evals/promptfoo-poc/fix/assets/js/code/api.js b/test/evals/promptfoo-poc/fix/assets/js/code/api.js new file mode 100644 index 00000000..a13f43ef --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/code/api.js @@ -0,0 +1,7 @@ +function fetchData() { + const apiURL = 'https://api.example.com/data'; + fetch(apiURL) + .then((response) => response.json()) + .then((data) => console.log(data)) + .catch((error) => console.error('Error:', error)); +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/js/code/api.post.js b/test/evals/promptfoo-poc/fix/assets/js/code/api.post.js new file mode 100644 index 00000000..72168b8b --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/code/api.post.js @@ -0,0 +1,12 @@ +pdx-1: function fetchData() { +pdx-2: const apiURL = 'https://api.example.com/data'; +pdx-3: fetch(apiURL) +pdx-4: .then((response) => { +pdx-5: if (!response.ok) { +pdx-6: throw new Error('Network response was not ok'); +pdx-7: } +pdx-8: return response.json(); +pdx-9: }) +pdx-10: .then((data) => console.log(data)) +pdx-11: .catch((error) => console.error('Error:', error)); +pdx-12: // rest of the function... diff --git a/test/evals/promptfoo-poc/fix/assets/js/code/main.js b/test/evals/promptfoo-poc/fix/assets/js/code/main.js new file mode 100644 index 00000000..d9ae1346 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/code/main.js @@ -0,0 +1,3 @@ +class DatabaseConnector { + // Existing methods +} diff --git a/test/evals/promptfoo-poc/fix/assets/js/code/main.post.js b/test/evals/promptfoo-poc/fix/assets/js/code/main.post.js new file mode 100644 index 00000000..bb124fa4 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/code/main.post.js @@ -0,0 +1,3 @@ +pdx-1: class DatabaseConnector { +pdx-2: // connect() method code here +pdx-3: } diff --git a/test/evals/promptfoo-poc/fix/assets/js/problems/age.problems.md b/test/evals/promptfoo-poc/fix/assets/js/problems/age.problems.md new file mode 100644 index 00000000..14290629 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/problems/age.problems.md @@ -0,0 +1,3 @@ +1. The error handling logic for 'birthYear' being in the future has been added, but it does not prevent the rest of the function from executing. +2. The console.log statement to print the age is always executed, even when the birthYear is in the future. +3. The original function's return statement was replaced with a console.log, but the final console.log outside the function incorrectly remains, intending to demonstrate the function's usage. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/js/problems/api.problems.md b/test/evals/promptfoo-poc/fix/assets/js/problems/api.problems.md new file mode 100644 index 00000000..9dc9f239 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/problems/api.problems.md @@ -0,0 +1 @@ +The updated file incorrectly forgets a closing bracket and includes a comment '// rest of the function...' which suggests more code follows, but it's actually a placeholder not replaced by existing code. It leaves the impression that part of the function might be missing or that the comment is inaccurately placed. Additionally, the added error handling within the fetch statement improves the function, but the comment issue could lead to misunderstandings about the code's completeness. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/js/problems/main.problems.md b/test/evals/promptfoo-poc/fix/assets/js/problems/main.problems.md new file mode 100644 index 00000000..cbd576d3 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/js/problems/main.problems.md @@ -0,0 +1 @@ +The update introduces a placeholder comment for the 'connect()' method instead of actual method implementation. It lacks the actual code needed to establish a database connection, especially with the latest security standards as intended. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/py/changes/dataprocessor.changes.md b/test/evals/promptfoo-poc/fix/assets/py/changes/dataprocessor.changes.md new file mode 100644 index 00000000..49da4bfd --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/changes/dataprocessor.changes.md @@ -0,0 +1,12 @@ +### Subtask 1: Implement the processData function to print each data item. Remove the helperFunction as it is no longer needed. + +```py +class DataProcessor: + def processData(self, data): + # Implementation goes here + pass + + def helperFunction(self): + # Helper implementation + pass +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/py/changes/main.changes.md b/test/evals/promptfoo-poc/fix/assets/py/changes/main.changes.md new file mode 100644 index 00000000..7a1ff31f --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/changes/main.changes.md @@ -0,0 +1,19 @@ +### Subtask 1: Add error handling for parameter validation. + +```py +def initialize(params): + if not params: + return False + # Setup parameters based on input + return True +``` + +### Subtask 2: Replace the placeholder comment with actual initialization code that sets up parameters based on input. + +```py +def initialize(params): + if not params: + return False + # Setup parameters based on input + return True +``` \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/py/code/dataprocessor.post.py b/test/evals/promptfoo-poc/fix/assets/py/code/dataprocessor.post.py new file mode 100644 index 00000000..7bde9851 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/code/dataprocessor.post.py @@ -0,0 +1,7 @@ +pdx-1: class DataProcessor: +pdx-2: def processData(self, data): +pdx-3: for item in data: +pdx-4: print(item) +pdx-5: def helperFunction(self): +pdx-6: # This should have been removed +pdx-7: pass \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/py/code/dataprocessor.py b/test/evals/promptfoo-poc/fix/assets/py/code/dataprocessor.py new file mode 100644 index 00000000..715c09d2 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/code/dataprocessor.py @@ -0,0 +1,8 @@ +class DataProcessor: + def processData(self, data): + # Implementation goes here + pass + + def helperFunction(self): + # Helper implementation + pass diff --git a/test/evals/promptfoo-poc/fix/assets/py/code/main.post.py b/test/evals/promptfoo-poc/fix/assets/py/code/main.post.py new file mode 100644 index 00000000..7ecc0f13 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/code/main.post.py @@ -0,0 +1,5 @@ +pdx-1: def initialize(params): +pdx-2: if not params: +pdx-3: return False +pdx-4: # Setup parameters based on input +pdx-5: return True diff --git a/test/evals/promptfoo-poc/fix/assets/py/code/main.py b/test/evals/promptfoo-poc/fix/assets/py/code/main.py new file mode 100644 index 00000000..ff1e7859 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/code/main.py @@ -0,0 +1,3 @@ +def initialize(params): + # existing init code + return True diff --git a/test/evals/promptfoo-poc/fix/assets/py/problems/dataprocessor.problems.md b/test/evals/promptfoo-poc/fix/assets/py/problems/dataprocessor.problems.md new file mode 100644 index 00000000..27601b6b --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/problems/dataprocessor.problems.md @@ -0,0 +1 @@ +The processData function was implemented correctly, but the helperFunction was not removed as required. Additionally, there's a missing newline before the definition of helperFunction which is violating PEP 8 style guidelines. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/py/problems/main.problems.md b/test/evals/promptfoo-poc/fix/assets/py/problems/main.problems.md new file mode 100644 index 00000000..ae72186f --- /dev/null +++ b/test/evals/promptfoo-poc/fix/assets/py/problems/main.problems.md @@ -0,0 +1 @@ +The code attempts to improve by adding parameter validation and replacing a placeholder comment with a vague description of actual code that should initialize parameters. However, it doesn't include the actual necessary code for parameter setup, and the comment used as a placeholder does not represent the original initialization code. \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/assets/removal/post_build.go b/test/evals/promptfoo-poc/fix/assets/removal/post_build.go deleted file mode 100644 index 4f855553..00000000 --- a/test/evals/promptfoo-poc/fix/assets/removal/post_build.go +++ /dev/null @@ -1,123 +0,0 @@ -pdx-1: package cmd -pdx-2: -pdx-3: import ( -pdx-4: "fmt" -pdx-5: "path/filepath" -pdx-6: "plandex/api" -pdx-7: "plandex/auth" -pdx-8: "plandex/lib" -pdx-9: "plandex/term" -pdx-10: "strconv" -pdx-11: "strings" -pdx-12: -pdx-13: "github.com/plandex/plandex/shared" -pdx-14: "github.com/spf13/cobra" -pdx-15: ) -pdx-16: -pdx-17: func parseRange(arg string) ([]int, error) { -pdx-18: var indices []int -pdx-19: parts := strings.Split(arg, "-") -pdx-20: if len(parts) == 2 { -pdx-21: start, err := strconv.Atoi(parts[0]) -pdx-22: if err != nil { -pdx-23: return nil, err -pdx-24: } -pdx-25: end, err := strconv.Atoi(parts[1]) -pdx-26: if err != nil { -pdx-27: return nil, err -pdx-28: } -pdx-29: for i := start; i <= end; i++ { -pdx-30: indices = append(indices, i) -pdx-31: } -pdx-32: } else { -pdx-33: index, err := strconv.Atoi(arg) -pdx-34: if err != nil { -pdx-35: return nil, err -pdx-36: } -pdx-37: indices = append(indices, index) -pdx-38: } -pdx-39: return indices, nil -pdx-40: } -pdx-41: -pdx-42: func contextRm(cmd *cobra.Command, args []string) { -pdx-43: auth.MustResolveAuthWithOrg() -pdx-44: lib.MustResolveProject() -pdx-45: -pdx-46: if lib.CurrentPlanId == "" { -pdx-47: fmt.Println("🤷‍♂️ No current plan") -pdx-48: return -pdx-49: } -pdx-50: -pdx-51: term.StartSpinner("") -pdx-52: contexts, err := api.Client.ListContext(lib.CurrentPlanId, lib.CurrentBranch) -pdx-53: -pdx-54: if err != nil { -pdx-55: term.OutputErrorAndExit("Error retrieving context: %v", err) -pdx-56: } -pdx-57: -pdx-58: deleteIds := map[string]bool{} -pdx-59: -pdx-60: for _, arg := range args { -pdx-61: indices, err := parseRange(arg) -pdx-62: if err != nil { -pdx-63: term.OutputErrorAndExit("Error parsing range: %v", err) -pdx-64: } -pdx-65: -pdx-66: for _, index := range indices { -pdx-67: if index > 0 && index <= len(contexts) { -pdx-68: context := contexts[index-1] -pdx-69: deleteIds[context.Id] = true -pdx-70: } -pdx-71: } -pdx-72: } -pdx-73: -pdx-74: for i, context := range contexts { -pdx-75: for _, id := range args { -pdx-76: if fmt.Sprintf("%d", i+1) == id || context.Name == id || context.FilePath == id || context.Url == id { -pdx-77: deleteIds[context.Id] = true -pdx-78: break -pdx-79: } else if context.FilePath != "" { -pdx-80: // Check if id is a glob pattern -pdx-81: matched, err := filepath.Match(id, context.FilePath) -pdx-82: if err != nil { -pdx-83: term.OutputErrorAndExit("Error matching glob pattern: %v", err) -pdx-84: } -pdx-85: if matched { -pdx-86: deleteIds[context.Id] = true -pdx-87: break -pdx-88: } -pdx-89: -pdx-90: // Check if id is a parent directory -pdx-91: parentDir := context.FilePath -pdx-92: for parentDir != "." && parentDir != "/" && parentDir != "" { -pdx-93: if parentDir == id { -pdx-94: deleteIds[context.Id] = true -pdx-95: break -pdx-96: } -pdx-97: parentDir = filepath.Dir(parentDir) // Move up one directory -pdx-98: } -pdx-99: } -pdx-100: } -pdx-101: } -pdx-102: -pdx-103: if len(deleteIds) > 0 { -pdx-104: res, err := api.Client.DeleteContext(lib.CurrentPlanId, lib.CurrentBranch, shared.DeleteContextRequest{ -pdx-105: Ids: deleteIds, -pdx-106: }) -pdx-107: term.StopSpinner() -pdx-108: -pdx-109: if err != nil { -pdx-110: term.OutputErrorAndExit("Error deleting context: %v", err) -pdx-111: } -pdx-112: -pdx-113: fmt.Println("✅ " + res.Msg) -pdx-114: } else { -pdx-115: term.StopSpinner() -pdx-116: fmt.Println("🤷‍♂️ No context removed") -pdx-117: } -pdx-118: } -pdx-119: -pdx-120: func init() { -pdx-121: RootCmd.AddCommand(contextRmCmd) -pdx-122: } -pdx-123: \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/promptfooconfig.yaml b/test/evals/promptfoo-poc/fix/promptfooconfig.yaml index 45d6870e..4eea945e 100644 --- a/test/evals/promptfoo-poc/fix/promptfooconfig.yaml +++ b/test/evals/promptfoo-poc/fix/promptfooconfig.yaml @@ -7,5 +7,9 @@ prompts: providers: - file://fix.provider.yml +defaultTest: + assert: + - type: is-json + - type: is-valid-openai-tools-call tests: tests/*.test.yml diff --git a/test/evals/promptfoo-poc/fix/tests/add.go.test.yml b/test/evals/promptfoo-poc/fix/tests/add.go.test.yml new file mode 100644 index 00000000..64a5ba1c --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/add.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Add functionality" + vars: + preBuildState: file://assets/go/code/add.go + changes: file://assets/go/changes/add.changes.md + problems: file://assets/go/problems/add.problems.txt + postBuildState: file://assets/go/code/add.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("if a == nil || b == nil {") + ) + ) diff --git a/test/evals/promptfoo-poc/fix/tests/age.js.test.yml b/test/evals/promptfoo-poc/fix/tests/age.js.test.yml new file mode 100644 index 00000000..95905bab --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/age.js.test.yml @@ -0,0 +1,13 @@ +- description: "Verify calculateAge function implementation" + vars: + preBuildState: file://assets/js/code/age.js + changes: file://assets/js/changes/age.changes.md + problems: file://assets/js/problems/age.problems.txt + postBuildState: file://assets/js/code/age.post.js + assert: + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.problems && args.changes.length > 0 && args.changes.some( + change => change.hasChange and change.new.includes("console.error(\"Birth year cannot be in the future.\")") + ) diff --git a/test/evals/promptfoo-poc/fix/tests/api.js.test.yml b/test/evals/promptfoo-poc/fix/tests/api.js.test.yml new file mode 100644 index 00000000..3fcab313 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/api.js.test.yml @@ -0,0 +1,13 @@ +- description: "Verify fetchData function implementation" + vars: + preBuildState: file://assets/js/code/api.js + changes: file://assets/js/changes/api.changes.md + problems: file://assets/js/problems/api.problems.txt + postBuildState: file://assets/js/code/api.post.js + assert: + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.problems && args.changes.length > 0 && args.changes.some( + change => change.hasChange and change.new.includes("if (!response.ok)") + ) diff --git a/test/evals/promptfoo-poc/fix/tests/arithmetic.go.test.yml b/test/evals/promptfoo-poc/fix/tests/arithmetic.go.test.yml new file mode 100644 index 00000000..e2269393 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/arithmetic.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Arithmetic functionality" + vars: + preBuildState: file://assets/go/code/arithmetic.go + changes: file://assets/go/changes/arithmetic.changes.md + problems: file://assets/go/problems/arithmetic.problems.txt + postBuildState: file://assets/go/code/arithmetic.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("fmt.Println(\"Modulus:\", a%b)") + ) + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/calculatesum.go.test.yml b/test/evals/promptfoo-poc/fix/tests/calculatesum.go.test.yml new file mode 100644 index 00000000..3bbe05be --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/calculatesum.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Calculate Sum functionality" + vars: + preBuildState: file://assets/go/code/calculatesum.go + changes: file://assets/go/changes/calculatesum.changes.md + problems: file://assets/go/problems/calculatesum.problems.txt + postBuildState: file://assets/go/code/calculatesum.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("if numbers == nil {") + ) + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/dataprocessor.py.test.yml b/test/evals/promptfoo-poc/fix/tests/dataprocessor.py.test.yml new file mode 100644 index 00000000..4acc39a2 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/dataprocessor.py.test.yml @@ -0,0 +1,13 @@ +- description: "Verify DataProcessor functionality implementation" + vars: + preBuildState: file://assets/py/code/dataprocessor.py + changes: file://assets/py/changes/dataprocessor.changes.md + problems: file://assets/py/problems/dataprocessor.problems.txt + postBuildState: file://assets/py/code/dataprocessor.post.py + assert: + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.problems && args.changes.length > 0 && args.changes.some( + change => change.hasChange and change.new.includes("def processData(self, data):") + ) diff --git a/test/evals/promptfoo-poc/fix/tests/fix.test.yml b/test/evals/promptfoo-poc/fix/tests/fix.go.test.yml similarity index 61% rename from test/evals/promptfoo-poc/fix/tests/fix.test.yml rename to test/evals/promptfoo-poc/fix/tests/fix.go.test.yml index 1084185a..22ed5877 100644 --- a/test/evals/promptfoo-poc/fix/tests/fix.test.yml +++ b/test/evals/promptfoo-poc/fix/tests/fix.go.test.yml @@ -1,12 +1,10 @@ - description: "Check Fix with Line numbers" vars: - preBuildState: file://assets/shared/pre_build.go - changes: file://assets/removal/changes.md - problems: file://assets/removal/problems.txt - postBuildState: file://assets/removal/post_build.go + preBuildState: file://assets/go/code/removal.go + changes: file://assets/go/changes/removal.changes.md + problems: file://assets/go/problems/removal.problems.txt + postBuildState: file://assets/go/removal.post.go assert: - - type: is-json - - type: is-valid-openai-tools-call - type: javascript value: | var args = JSON.parse(output[0].function.arguments) diff --git a/test/evals/promptfoo-poc/fix/tests/globalconfig.go.test.yml b/test/evals/promptfoo-poc/fix/tests/globalconfig.go.test.yml new file mode 100644 index 00000000..e66c962f --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/globalconfig.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Global Config functionality" + vars: + preBuildState: file://assets/go/code/globalconfig.go + changes: file://assets/go/changes/globalconfig.changes.md + problems: file://assets/go/problems/globalconfig_problems.txt + postBuildState: file://assets/go/code/globalconfig.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("GlobalConfig holds configuration settings.") + ) + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/hello.go.test.yml b/test/evals/promptfoo-poc/fix/tests/hello.go.test.yml new file mode 100644 index 00000000..8b8f32b6 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/hello.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Hello World functionality" + vars: + preBuildState: file://assets/go/code/hello.go + changes: file://assets/go/changes/hello.changes.md + problems: file://assets/go/problems/hello.problems.txt + postBuildState: file://assets/go/code/hello.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("func main() {}") + ) + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/helloworld.java.test.yml b/test/evals/promptfoo-poc/fix/tests/helloworld.java.test.yml new file mode 100644 index 00000000..63ccb796 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/helloworld.java.test.yml @@ -0,0 +1,13 @@ +- description: "Verify HelloWorld class header and functionality" + vars: + preBuildState: file://assets/java/code/HelloWorld.java + changes: file://assets/java/changes/HelloWorld.changes.md + problems: file://assets/java/problems/HelloWorld.problems.txt + postBuildState: file://assets/java/code/HelloWorld.post.java + assert: + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.problems && args.changes.length > 0 && args.changes.some( + change => change.hasChange and change.new.includes("/* This class demonstrates printing \"Hello, World!\" to the console. */") + ) diff --git a/test/evals/promptfoo-poc/fix/tests/index.html.test.yml b/test/evals/promptfoo-poc/fix/tests/index.html.test.yml new file mode 100644 index 00000000..52df0ab8 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/index.html.test.yml @@ -0,0 +1,13 @@ +- description: "Verify HTML page title and paragraph updates" + vars: + preBuildState: file://assets/html/code/index.html + changes: file://assets/html/changes/index.changes.md + problems: file://assets/html/problems/index.problems.txt + postBuildState: file://assets/html/code/index.post.html + assert: + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.problems && args.changes.length > 0 && args.changes.some( + change => change.hasChange and change.new.includes("Updated Title") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/loop.go.test.yml b/test/evals/promptfoo-poc/fix/tests/loop.go.test.yml new file mode 100644 index 00000000..42c18cda --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/loop.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Loop functionality" + vars: + preBuildState: file://assets/go/code/loop.go + changes: file://assets/go/changes/loop.changes.md + problems: file://assets/go/problems/loop.problems.txt + postBuildState: file://assets/go/code/loop.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("for i := 0; i < 10; i++ {") + ) + ) diff --git a/test/evals/promptfoo-poc/fix/tests/main.go.test.yml b/test/evals/promptfoo-poc/fix/tests/main.go.test.yml new file mode 100644 index 00000000..31aaa89e --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/main.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Main functionality" + vars: + preBuildState: file://assets/go/code/main.go + changes: file://assets/go/changes/main.changes.md + problems: file://assets/go/problems/main.problems.txt + postBuildState: file://assets/go/code/main.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes('fmt.Println("Goodbye, world!")') + ) + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/main.js.test.yml b/test/evals/promptfoo-poc/fix/tests/main.js.test.yml new file mode 100644 index 00000000..89f6bc26 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/main.js.test.yml @@ -0,0 +1,13 @@ +- description: "Verify DatabaseConnector class implementation" + vars: + preBuildState: file://assets/js/code/main.js + changes: file://assets/js/changes/main.changes.md + problems: file://assets/js/problems/main.problems.txt + postBuildState: file://assets/js/code/main.post.js + assert: + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.problems && args.changes.length > 0 && args.changes.some( + change => change.hasChange and change.new.includes("connect()") + ) diff --git a/test/evals/promptfoo-poc/fix/tests/main.py.test.yml b/test/evals/promptfoo-poc/fix/tests/main.py.test.yml new file mode 100644 index 00000000..1491f2e2 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/main.py.test.yml @@ -0,0 +1,13 @@ +- description: "Verify main initialization function implementation" + vars: + preBuildState: file://assets/py/code/main.py + changes: file://assets/py/changes/main.changes.md + problems: file://assets/py/problems/main.problems.txt + postBuildState: file://assets/py/code/main.post.py + assert: + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.problems && args.changes.length > 0 && args.changes.some( + change => change.hasChange and change.new.includes("def initialize(params):") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/print.go.test.yml b/test/evals/promptfoo-poc/fix/tests/print.go.test.yml new file mode 100644 index 00000000..f068f8a1 --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/print.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Print functionality" + vars: + preBuildState: file://assets/go/code/print.go + changes: file://assets/go/changes/print.changes.md + problems: file://assets/go/problems/print.problems.txt + postBuildState: file://assets/go/code/print.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("// print farewell message") + ) + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/fix/tests/removal.go.test.yml b/test/evals/promptfoo-poc/fix/tests/removal.go.test.yml new file mode 100644 index 00000000..9cd670ba --- /dev/null +++ b/test/evals/promptfoo-poc/fix/tests/removal.go.test.yml @@ -0,0 +1,19 @@ +- description: "Check Removal functionality" + vars: + preBuildState: file://assets/go/code/removal.go + changes: file://assets/go/changes/removal.changes.md + problems: file://assets/go/problems/removal_problems.txt + postBuildState: file://assets/go/code/removal.post.go + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return ( + args.problems && + args.changes.length > 0 && + args.changes.some( + change => change.hasChange && change.new.includes("deleteIds[context.Id] = true") + ) + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/go/changes/feature_x.changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/feature_x.changes.md new file mode 100644 index 00000000..b61739f6 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/feature_x.changes.md @@ -0,0 +1,16 @@ +### Subtask 1: Implement the missing feature X by adding a function that prints 'Feature X implemented'. + +```go +package main + +import "fmt" + +func main() { + fmt.Println("Starting application...") + // TODO: Implement feature X +} + +func featureX() { + fmt.Println("Feature X implemented") +} +``` diff --git a/test/evals/promptfoo-poc/verify/assets/go/changes/hello.changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/hello.changes.md new file mode 100644 index 00000000..cc09c60d --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/hello.changes.md @@ -0,0 +1,28 @@ +### Subtask 1: Update the 'main' function to add a new 'hello' function that takes a name as an argument and prints 'Hello, !'. + + +```go +package main + +func main() { + fmt.Println("Hello, World!") +} + +func hello(name string) { + fmt.Println("Hello,", name, "!") +} +``` + +### Subtask 2: Replace the existing print statement in 'main' with a call to this new function, passing a default name. + +```go +package main + +func main() { + hello("World") +} + +func hello(name string) { + fmt.Println("Hello,", name, "!") +} +``` diff --git a/test/evals/promptfoo-poc/verify/assets/go/changes/hello_date.changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/hello_date.changes.md new file mode 100644 index 00000000..74a95582 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/hello_date.changes.md @@ -0,0 +1,32 @@ +### Subtask 1: Update the 'Hello, World!' string to include the current date. + +```go +package main + +import ( + "fmt" + "time" +) + +func main() { + fmt.Println("Hello, World! Today is", time.Now().Format("2006-01-02")) +} +``` + +### Subtask 2: Add error handling for the fmt.Println function. + +```go +package main + +import ( + "fmt" + "time" +) + +func main() { + current_time := time.Now().Format("2006-01-02") + if _, err := fmt.Println("Hello, World! Current date: ", current_time); err != nil { + panic(err) + } +} +``` diff --git a/test/evals/promptfoo-poc/verify/assets/go/changes/hello_universe.changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/hello_universe.changes.md new file mode 100644 index 00000000..6feff4f4 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/hello_universe.changes.md @@ -0,0 +1,15 @@ +### Subtask 1: Add a new function to print 'Hello Universe' and remove the 'Hello World' print statement. + +```go +package main + +import "fmt" + +func main() { + printHelloUniverse() +} + +func printHelloUniverse() { + fmt.Println("Hello, Universe!") +} +``` diff --git a/test/evals/promptfoo-poc/verify/assets/go/changes/main_hello_universe.changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/main_hello_universe.changes.md new file mode 100644 index 00000000..e0c622c1 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/main_hello_universe.changes.md @@ -0,0 +1,2 @@ +Add a new function to print 'Hello Universe' and refactor the 'Hello World' print statement into its own function. + diff --git a/test/evals/promptfoo-poc/verify/assets/removal/changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/removal.changes.md similarity index 100% rename from test/evals/promptfoo-poc/verify/assets/removal/changes.md rename to test/evals/promptfoo-poc/verify/assets/go/changes/removal.changes.md diff --git a/test/evals/promptfoo-poc/verify/assets/go/changes/reverse.changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/reverse.changes.md new file mode 100644 index 00000000..45377cc0 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/reverse.changes.md @@ -0,0 +1,2 @@ +Add a new function to reverse a string and call it with 'Hello, World!' as the parameter. + diff --git a/test/evals/promptfoo-poc/verify/assets/go/changes/sum.changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/sum.changes.md new file mode 100644 index 00000000..a7265f88 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/sum.changes.md @@ -0,0 +1,2 @@ +Implement the call to sum function within main with parameters 5 and 3. Correctly handle the return value. + diff --git a/test/evals/promptfoo-poc/verify/assets/valid/changes.md b/test/evals/promptfoo-poc/verify/assets/go/changes/valid.changes.md similarity index 93% rename from test/evals/promptfoo-poc/verify/assets/valid/changes.md rename to test/evals/promptfoo-poc/verify/assets/go/changes/valid.changes.md index 2703da18..b8b57d51 100644 --- a/test/evals/promptfoo-poc/verify/assets/valid/changes.md +++ b/test/evals/promptfoo-poc/verify/assets/go/changes/valid.changes.md @@ -1,6 +1,6 @@ - ### Subtask 1: Parse the range of indices from the command-line arguments. - - ``` +### Subtask 1: Parse the range of indices from the command-line arguments. + +```go package cmd import ( @@ -41,4 +41,4 @@ } return indices, nil } - ``` \ No newline at end of file +``` diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.go b/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.go new file mode 100644 index 00000000..759854d4 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.go @@ -0,0 +1,8 @@ +pdx-1: package main +pdx-2: +pdx-3: import "fmt" +pdx-4: +pdx-5: func main() { +pdx-6: fmt.Println("Starting application...") +pdx-7: // TODO: Implement feature X +pdx-8: } diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.post.go b/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.post.go new file mode 100644 index 00000000..2e496d4c --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.post.go @@ -0,0 +1,12 @@ +package main + +import "fmt" + +func featureX() { + fmt.Println("Feature X implemented") +} + +func main() { + fmt.Println("Starting application...") + featureX() +} diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/hello.go b/test/evals/promptfoo-poc/verify/assets/go/code/hello.go new file mode 100644 index 00000000..14899027 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/hello.go @@ -0,0 +1,8 @@ +pdx-1: package main +pdx-2: +pdx-3: import "fmt" +pdx-4: +pdx-5: func main() { +pdx-6: fmt.Println("Hello, world!") +pdx-7: } +pdx-8: diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/hello.post.go b/test/evals/promptfoo-poc/verify/assets/go/code/hello.post.go new file mode 100644 index 00000000..b9728b45 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/hello.post.go @@ -0,0 +1,11 @@ +package main + +import "fmt" + +func main() { + hello("World") +} + +func hello(name string) { + fmt.Println("Hello,", name, "!") +} diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.go b/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.go new file mode 100644 index 00000000..95561e7d --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.go @@ -0,0 +1,7 @@ +pdx-1: package main +pdx-2: +pdx-3: import "fmt" +pdx-4: +pdx-5: func main() { +pdx-6: fmt.Println("Hello, World!") +pdx-7: } diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.post.go b/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.post.go new file mode 100644 index 00000000..6c9ab1bf --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.post.go @@ -0,0 +1,13 @@ +package main + +import ( + "fmt" + "time" +) + +func main() { + current_time := time.Now().Format("2006-01-02") + if _, err := fmt.Println("Hello, World! Current date: ", current_time); err != nil { + panic(err) + } +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.go b/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.go new file mode 100644 index 00000000..6cadcf1f --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.go @@ -0,0 +1,7 @@ +pdx-1: package main +pdx-2: +pdx-3: import "fmt" +pdx-4: +pdx-5: func main() { +pdx-6: fmt.Println("Hello World") +pdx-7: } diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.post.go b/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.post.go new file mode 100644 index 00000000..22ca7b81 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.post.go @@ -0,0 +1,11 @@ +package main + +import "fmt" + +func main() { + printHelloUniverse() +} + +func printHelloUniverse() { + fmt.Println("Hello Universe") +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.go b/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.go new file mode 100644 index 00000000..6cadcf1f --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.go @@ -0,0 +1,7 @@ +pdx-1: package main +pdx-2: +pdx-3: import "fmt" +pdx-4: +pdx-5: func main() { +pdx-6: fmt.Println("Hello World") +pdx-7: } diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.post.go b/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.post.go new file mode 100644 index 00000000..76a3216e --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.post.go @@ -0,0 +1,16 @@ +package main + +import "fmt" + +func printHelloWorld() { + fmt.Println("Hello World") +} + +func printHelloUniverse() { + fmt.Println("Hello Universe") +} + +func main() { + printHelloWorld() + printHelloUniverse() +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/shared/pre_build.go b/test/evals/promptfoo-poc/verify/assets/go/code/removal.go similarity index 100% rename from test/evals/promptfoo-poc/verify/assets/shared/pre_build.go rename to test/evals/promptfoo-poc/verify/assets/go/code/removal.go diff --git a/test/evals/promptfoo-poc/fix/assets/shared/pre_build.go b/test/evals/promptfoo-poc/verify/assets/go/code/removal.post.go similarity index 69% rename from test/evals/promptfoo-poc/fix/assets/shared/pre_build.go rename to test/evals/promptfoo-poc/verify/assets/go/code/removal.post.go index c90eeb79..e9da642e 100644 --- a/test/evals/promptfoo-poc/fix/assets/shared/pre_build.go +++ b/test/evals/promptfoo-poc/verify/assets/go/code/removal.post.go @@ -7,18 +7,36 @@ import ( "plandex/auth" "plandex/lib" "plandex/term" + "strconv" + "strings" "github.com/plandex/plandex/shared" "github.com/spf13/cobra" ) -var contextRmCmd = &cobra.Command{ - Use: "rm", - Aliases: []string{"remove", "unload"}, - Short: "Remove context", - Long: `Remove context by index, name, or glob.`, - Args: cobra.MinimumNArgs(1), - Run: contextRm, +func parseRange(arg string) ([]int, error) { + var indices []int + parts := strings.Split(arg, "-") + if len(parts) == 2 { + start, err := strconv.Atoi(parts[0]) + if err != nil { + return nil, err + } + end, err := strconv.Atoi(parts[1]) + if err != nil { + return nil, err + } + for i := start; i <= end; i++ { + indices = append(indices, i) + } + } else { + index, err := strconv.Atoi(arg) + if err != nil { + return nil, err + } + indices = append(indices, index) + } + return indices, nil } func contextRm(cmd *cobra.Command, args []string) { @@ -39,6 +57,20 @@ func contextRm(cmd *cobra.Command, args []string) { deleteIds := map[string]bool{} + for _, arg := range args { + indices, err := parseRange(arg) + if err != nil { + term.OutputErrorAndExit("Error parsing range: %v", err) + } + + for _, index := range indices { + if index > 0 && index <= len(contexts) { + context := contexts[index-1] + deleteIds[context.Id] = true + } + } + } + for i, context := range contexts { for _, id := range args { if fmt.Sprintf("%d", i+1) == id || context.Name == id || context.FilePath == id || context.Url == id { @@ -64,7 +96,6 @@ func contextRm(cmd *cobra.Command, args []string) { } parentDir = filepath.Dir(parentDir) // Move up one directory } - } } } diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/reverse.go b/test/evals/promptfoo-poc/verify/assets/go/code/reverse.go new file mode 100644 index 00000000..5b3f5569 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/reverse.go @@ -0,0 +1,6 @@ +pdx-1: package main +pdx-2: +pdx-3: func main() { +pdx-4: println("Hello, World!") +pdx-5: } +pdx-6: diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/reverse.post.go b/test/evals/promptfoo-poc/verify/assets/go/code/reverse.post.go new file mode 100644 index 00000000..cf959e27 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/reverse.post.go @@ -0,0 +1,13 @@ +package main + +func main() { + println(reverse("!dlroW ,olleH")) +} + +func reverse(s string) string { + runes := []rune(s) + for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { + runes[i], runes[j] = runes[j], runes[i] + } + return string(runes) +} diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/sum.go b/test/evals/promptfoo-poc/verify/assets/go/code/sum.go new file mode 100644 index 00000000..f4990d52 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/sum.go @@ -0,0 +1,9 @@ +pdx-1: package testpack +pdx-2: +pdx-3: func sum(a int, b int) int { +pdx-4: return a + b +pdx-5: } +pdx-6: +pdx-7: func main() { +pdx-8: // Call the sum function +pdx-9: } diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/sum.post.go b/test/evals/promptfoo-poc/verify/assets/go/code/sum.post.go new file mode 100644 index 00000000..aef9d4c4 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/sum.post.go @@ -0,0 +1,10 @@ +package testpack + +func sum(a int, b int) int { + return a + b +} + +func main() { + result := sum(5, 3) + // TODO: Use the result +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/go/code/valid.go b/test/evals/promptfoo-poc/verify/assets/go/code/valid.go new file mode 100644 index 00000000..7979a199 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/code/valid.go @@ -0,0 +1,92 @@ +pdx-1: package cmd +pdx-2: +pdx-3: import ( +pdx-4: "fmt" +pdx-5: "path/filepath" +pdx-6: "plandex/api" +pdx-7: "plandex/auth" +pdx-8: "plandex/lib" +pdx-9: "plandex/term" +pdx-10: +pdx-11: "github.com/plandex/plandex/shared" +pdx-12: "github.com/spf13/cobra" +pdx-13: ) +pdx-14: +pdx-15: var contextRmCmd = &cobra.Command{ +pdx-16: Use: "rm", +pdx-17: Aliases: []string{"remove", "unload"}, +pdx-18: Short: "Remove context", +pdx-19: Long: `Remove context by index, name, or glob.`, +pdx-20: Args: cobra.MinimumNArgs(1), +pdx-21: Run: contextRm, +pdx-22: } +pdx-23: +pdx-24: func contextRm(cmd *cobra.Command, args []string) { +pdx-25: auth.MustResolveAuthWithOrg() +pdx-26: lib.MustResolveProject() +pdx-27: +pdx-28: if lib.CurrentPlanId == "" { +pdx-29: fmt.Println("🤷‍♂️ No current plan") +pdx-30: return +pdx-31: } +pdx-32: +pdx-33: term.StartSpinner("") +pdx-34: contexts, err := api.Client.ListContext(lib.CurrentPlanId, lib.CurrentBranch) +pdx-35: +pdx-36: if err != nil { +pdx-37: term.OutputErrorAndExit("Error retrieving context: %v", err) +pdx-38: } +pdx-39: +pdx-40: deleteIds := map[string]bool{} +pdx-41: +pdx-42: for i, context := range contexts { +pdx-43: for _, id := range args { +pdx-44: if fmt.Sprintf("%d", i+1) == id || context.Name == id || context.FilePath == id || context.Url == id { +pdx-45: deleteIds[context.Id] = true +pdx-46: break +pdx-47: } else if context.FilePath != "" { +pdx-48: // Check if id is a glob pattern +pdx-49: matched, err := filepath.Match(id, context.FilePath) +pdx-50: if err != nil { +pdx-51: term.OutputErrorAndExit("Error matching glob pattern: %v", err) +pdx-52: } +pdx-53: if matched { +pdx-54: deleteIds[context.Id] = true +pdx-55: break +pdx-56: } +pdx-57: +pdx-58: // Check if id is a parent directory +pdx-59: parentDir := context.FilePath +pdx-60: for parentDir != "." && parentDir != "/" && parentDir != "" { +pdx-61: if parentDir == id { +pdx-62: deleteIds[context.Id] = true +pdx-63: break +pdx-64: } +pdx-65: parentDir = filepath.Dir(parentDir) // Move up one directory +pdx-66: } +pdx-67: +pdx-68: } +pdx-69: } +pdx-70: } +pdx-71: +pdx-72: if len(deleteIds) > 0 { +pdx-73: res, err := api.Client.DeleteContext(lib.CurrentPlanId, lib.CurrentBranch, shared.DeleteContextRequest{ +pdx-74: Ids: deleteIds, +pdx-75: }) +pdx-76: term.StopSpinner() +pdx-77: +pdx-78: if err != nil { +pdx-79: term.OutputErrorAndExit("Error deleting context: %v", err) +pdx-80: } +pdx-81: +pdx-82: fmt.Println("✅ " + res.Msg) +pdx-83: } else { +pdx-84: term.StopSpinner() +pdx-85: fmt.Println("🤷‍♂️ No context removed") +pdx-86: } +pdx-87: } +pdx-88: +pdx-89: func init() { +pdx-90: RootCmd.AddCommand(contextRmCmd) +pdx-91: } +pdx-92: \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/valid/post_build.go b/test/evals/promptfoo-poc/verify/assets/go/code/valid.post.go similarity index 100% rename from test/evals/promptfoo-poc/verify/assets/valid/post_build.go rename to test/evals/promptfoo-poc/verify/assets/go/code/valid.post.go diff --git a/test/evals/promptfoo-poc/verify/assets/go/diffs/feature_x.diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/feature_x.diff.txt new file mode 100644 index 00000000..a9e5d8ba --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/diffs/feature_x.diff.txt @@ -0,0 +1,15 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.go 2024-07-29 18:43:10.438097196 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/feature_x.post.go 2024-07-29 18:25:52.510325316 +0100 +@@ -2,7 +2,11 @@ + + import "fmt" + ++func featureX() { ++ fmt.Println("Feature X implemented") ++} ++ + func main() { + fmt.Println("Starting application...") +- // TODO: Implement feature X ++ featureX() + } diff --git a/test/evals/promptfoo-poc/verify/assets/go/diffs/hello.diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/hello.diff.txt new file mode 100644 index 00000000..7f93371a --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/diffs/hello.diff.txt @@ -0,0 +1,13 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/hello.go 2024-07-29 18:43:45.794858671 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/hello.post.go 2024-07-29 18:15:19.250753752 +0100 +@@ -3,6 +3,9 @@ + import "fmt" + + func main() { +- fmt.Println("Hello, world!") ++ hello("World") + } + ++func hello(name string) { ++ fmt.Println("Hello,", name, "!") ++} diff --git a/test/evals/promptfoo-poc/verify/assets/go/diffs/hello_date.diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/hello_date.diff.txt new file mode 100644 index 00000000..2be4d663 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/diffs/hello_date.diff.txt @@ -0,0 +1,20 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.go 2024-07-29 18:43:21.838342068 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/hello_date.post.go 2024-07-19 16:33:23.976712064 +0100 +@@ -1,7 +1,13 @@ + package main + +-import "fmt" ++import ( ++ "fmt" ++ "time" ++) + + func main() { +- fmt.Println("Hello, World!") +-} ++ current_time := time.Now().Format("2006-01-02") ++ if _, err := fmt.Println("Hello, World! Current date: ", current_time); err != nil { ++ panic(err) ++ } ++} +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/go/diffs/hello_universe.diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/hello_universe.diff.txt new file mode 100644 index 00000000..8b8b3a42 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/diffs/hello_universe.diff.txt @@ -0,0 +1,14 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.go 2024-07-29 18:43:29.362504031 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/hello_universe.post.go 2024-07-19 16:33:23.972711964 +0100 +@@ -3,5 +3,9 @@ + import "fmt" + + func main() { +- fmt.Println("Hello World") ++ printHelloUniverse() + } ++ ++func printHelloUniverse() { ++ fmt.Println("Hello Universe") ++} +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/go/diffs/main_hello_universe.diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/main_hello_universe.diff.txt new file mode 100644 index 00000000..f1211190 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/diffs/main_hello_universe.diff.txt @@ -0,0 +1,20 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.go 2024-07-29 18:43:54.035036980 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/main_hello_universe.post.go 2024-07-19 16:33:23.976712064 +0100 +@@ -2,6 +2,15 @@ + + import "fmt" + +-func main() { ++func printHelloWorld() { + fmt.Println("Hello World") + } ++ ++func printHelloUniverse() { ++ fmt.Println("Hello Universe") ++} ++ ++func main() { ++ printHelloWorld() ++ printHelloUniverse() ++} +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/removal/diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/removal.diff.txt similarity index 100% rename from test/evals/promptfoo-poc/verify/assets/removal/diff.txt rename to test/evals/promptfoo-poc/verify/assets/go/diffs/removal.diff.txt diff --git a/test/evals/promptfoo-poc/verify/assets/go/diffs/reverse.diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/reverse.diff.txt new file mode 100644 index 00000000..b4419f7f --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/diffs/reverse.diff.txt @@ -0,0 +1,18 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/reverse.go 2024-07-29 18:44:00.443175861 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/reverse.post.go 2024-07-19 16:33:23.972711964 +0100 +@@ -1,6 +1,13 @@ + package main + + func main() { +- println("Hello, World!") ++ println(reverse("!dlroW ,olleH")) ++} ++ ++func reverse(s string) string { ++ runes := []rune(s) ++ for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 { ++ runes[i], runes[j] = runes[j], runes[i] ++ } ++ return string(runes) + } +- diff --git a/test/evals/promptfoo-poc/verify/assets/go/diffs/sum.diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/sum.diff.txt new file mode 100644 index 00000000..0a2335d3 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/go/diffs/sum.diff.txt @@ -0,0 +1,12 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/sum.go 2024-07-29 18:44:07.635331952 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/go/code/sum.post.go 2024-07-19 16:33:23.972711964 +0100 +@@ -5,5 +5,6 @@ + } + + func main() { +- // Call the sum function +-} ++ result := sum(5, 3) ++ // TODO: Use the result ++} +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/valid/diff.txt b/test/evals/promptfoo-poc/verify/assets/go/diffs/valid.diff.txt similarity index 100% rename from test/evals/promptfoo-poc/verify/assets/valid/diff.txt rename to test/evals/promptfoo-poc/verify/assets/go/diffs/valid.diff.txt diff --git a/test/evals/promptfoo-poc/verify/assets/java/changes/calculator.changes.md b/test/evals/promptfoo-poc/verify/assets/java/changes/calculator.changes.md new file mode 100644 index 00000000..1b081f86 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/changes/calculator.changes.md @@ -0,0 +1,2 @@ +Implement the subtract method. + diff --git a/test/evals/promptfoo-poc/verify/assets/java/changes/hello_world.changes.md b/test/evals/promptfoo-poc/verify/assets/java/changes/hello_world.changes.md new file mode 100644 index 00000000..f62dd6b2 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/changes/hello_world.changes.md @@ -0,0 +1,4 @@ +### Subtask 1: Translate the code from Java to Python. + +### Subtask 2: Include a comment suggesting future improvements. + diff --git a/test/evals/promptfoo-poc/verify/assets/java/changes/solution.changes.md b/test/evals/promptfoo-poc/verify/assets/java/changes/solution.changes.md new file mode 100644 index 00000000..52e36087 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/changes/solution.changes.md @@ -0,0 +1,2 @@ +Introduce a syntax error by missing a semicolon in the sort line. + diff --git a/test/evals/promptfoo-poc/verify/assets/java/changes/start.changes.md b/test/evals/promptfoo-poc/verify/assets/java/changes/start.changes.md new file mode 100644 index 00000000..95004d18 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/changes/start.changes.md @@ -0,0 +1,2 @@ +Introduce a new method 'greet' that prints a greeting message. Modify 'main' to call this new method. Ensure to catch any syntax errors. + diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/calculator.java b/test/evals/promptfoo-poc/verify/assets/java/code/calculator.java new file mode 100644 index 00000000..41ae8d09 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/calculator.java @@ -0,0 +1,9 @@ +pdx-1: class Calculator { +pdx-2: private int result = 0; +pdx-3: +pdx-4: public void add(int number) { +pdx-5: result += number; +pdx-6: } +pdx-7: +pdx-8: // TODO: Subtract method +pdx-9: } diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/calculator.post.java b/test/evals/promptfoo-poc/verify/assets/java/code/calculator.post.java new file mode 100644 index 00000000..dafa0179 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/calculator.post.java @@ -0,0 +1,11 @@ +class Calculator { + private int result = 0; + + public void add(int number) { + result += number; + } + + public void subtract(int number) { + result -= number; + } +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.java b/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.java new file mode 100644 index 00000000..15d7442a --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.java @@ -0,0 +1,5 @@ +pdx-1: public class HelloWorld { +pdx-2: public static void main(String[] args) { +pdx-3: System.out.println("Hello, world!"); +pdx-4: } +pdx-5: } diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.post.py b/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.post.py new file mode 100644 index 00000000..693e72af --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.post.py @@ -0,0 +1,6 @@ +# Future improvements: optimize greeting efficiency +def main(): + print("Hello, world!") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/solution.java b/test/evals/promptfoo-poc/verify/assets/java/code/solution.java new file mode 100644 index 00000000..29dd507d --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/solution.java @@ -0,0 +1,10 @@ +pdx-1: import java.util.*; +pdx-2: +pdx-3: class Solution { +pdx-4: public static void main(String[] args) { +pdx-5: System.out.println("Array before sort: " + Arrays.toString(args)); +pdx-6: Arrays.sort(args); +pdx-7: System.out.println("Array after sort: " + Arrays.toString(args)); +pdx-8: } +pdx-9: } +pdx-10: diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/solution.post.java b/test/evals/promptfoo-poc/verify/assets/java/code/solution.post.java new file mode 100644 index 00000000..f233f7fb --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/solution.post.java @@ -0,0 +1,9 @@ +import java.util.*; + +class Solution { + public static void main(String[] args) { + System.out.println("Array before sort: " + Arrays.toString(args)) + Arrays.sort(args); + System.out.println("Array after sort: " + Arrays.toString(args)); + } +} diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/start.java b/test/evals/promptfoo-poc/verify/assets/java/code/start.java new file mode 100644 index 00000000..c2dfdaae --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/start.java @@ -0,0 +1,6 @@ +pdx-1: import java.util.*; +pdx-2: public class Example { +pdx-3: public static void main(String[] args) { +pdx-4: System.out.println("Starting application..."); +pdx-5: } +pdx-6: } diff --git a/test/evals/promptfoo-poc/verify/assets/java/code/start.post.java b/test/evals/promptfoo-poc/verify/assets/java/code/start.post.java new file mode 100644 index 00000000..fe480b68 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/code/start.post.java @@ -0,0 +1,9 @@ +import java.util.*; +public class Example { + public static void main(String[] args) { + greet(); + } + public static void greet() { + System.out.println("Greetings from the application."); + } +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/java/diffs/calculator.diff.txt b/test/evals/promptfoo-poc/verify/assets/java/diffs/calculator.diff.txt new file mode 100644 index 00000000..46a0ec68 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/diffs/calculator.diff.txt @@ -0,0 +1,13 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/calculator.java 2024-07-29 18:52:20.206428433 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/calculator.post.java 2024-07-19 16:33:23.976712064 +0100 +@@ -5,5 +5,7 @@ + result += number; + } + +- // TODO: Subtract method +-} ++ public void subtract(int number) { ++ result -= number; ++ } ++} +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/java/diffs/hello_world.diff.txt b/test/evals/promptfoo-poc/verify/assets/java/diffs/hello_world.diff.txt new file mode 100644 index 00000000..e0199cb9 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/diffs/hello_world.diff.txt @@ -0,0 +1,15 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.java 2024-07-29 18:52:27.402594822 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/hello_world.post.java 2024-07-19 16:33:37.025039933 +0100 +@@ -1,5 +1,6 @@ +-public class HelloWorld { +- public static void main(String[] args) { +- System.out.println("Hello, world!"); +- } +-} ++# Future improvements: optimize greeting efficiency ++def main(): ++ print("Hello, world!") ++ ++if __name__ == "__main__": ++ main() +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/java/diffs/solution.diff.txt b/test/evals/promptfoo-poc/verify/assets/java/diffs/solution.diff.txt new file mode 100644 index 00000000..693f0592 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/diffs/solution.diff.txt @@ -0,0 +1,13 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/solution.java 2024-07-29 18:52:39.986885996 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/solution.post.java 2024-07-19 16:33:23.972711964 +0100 +@@ -2,9 +2,8 @@ + + class Solution { + public static void main(String[] args) { +- System.out.println("Array before sort: " + Arrays.toString(args)); ++ System.out.println("Array before sort: " + Arrays.toString(args)) + Arrays.sort(args); + System.out.println("Array after sort: " + Arrays.toString(args)); + } + } +- diff --git a/test/evals/promptfoo-poc/verify/assets/java/diffs/start.diff.txt b/test/evals/promptfoo-poc/verify/assets/java/diffs/start.diff.txt new file mode 100644 index 00000000..edcea0c7 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/java/diffs/start.diff.txt @@ -0,0 +1,15 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/start.java 2024-07-29 18:52:45.519014083 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/java/code/start.post.java 2024-07-19 16:33:23.972711964 +0100 +@@ -1,6 +1,9 @@ + import java.util.*; + public class Example { + public static void main(String[] args) { +- System.out.println("Starting application..."); ++ greet(); + } +-} ++ public static void greet() { ++ System.out.println("Greetings from the application."); ++ } ++} +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/py/changes/greet.changes.md b/test/evals/promptfoo-poc/verify/assets/py/changes/greet.changes.md new file mode 100644 index 00000000..948bad25 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/changes/greet.changes.md @@ -0,0 +1,2 @@ +Refactor the greet function into a class with a static method, but introduce duplicate class definitions. + diff --git a/test/evals/promptfoo-poc/verify/assets/py/changes/json.changes.md b/test/evals/promptfoo-poc/verify/assets/py/changes/json.changes.md new file mode 100644 index 00000000..4fba3fa1 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/changes/json.changes.md @@ -0,0 +1,2 @@ +Complete the 'convert_to_json' function to correctly convert a Python dictionary into a JSON string. Ensure proper error handling. + diff --git a/test/evals/promptfoo-poc/verify/assets/py/changes/main.changes.md b/test/evals/promptfoo-poc/verify/assets/py/changes/main.changes.md new file mode 100644 index 00000000..939b1568 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/changes/main.changes.md @@ -0,0 +1,2 @@ +Refactor the 'main' function to include error handling for a potential FileNotFoundError. This change should preserve the initial print statement but should add a try-except block around it. + diff --git a/test/evals/promptfoo-poc/verify/assets/py/code/greet.post.py b/test/evals/promptfoo-poc/verify/assets/py/code/greet.post.py new file mode 100644 index 00000000..996b7c67 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/code/greet.post.py @@ -0,0 +1,9 @@ +class Greeter: + @staticmethod + def greet(name): + print(f"Hello, {name}!") + +class Greeter: + @staticmethod + def greet(name): + print(f"Welcome, {name}!") diff --git a/test/evals/promptfoo-poc/verify/assets/py/code/greet.py b/test/evals/promptfoo-poc/verify/assets/py/code/greet.py new file mode 100644 index 00000000..5d9fbe2a --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/code/greet.py @@ -0,0 +1,2 @@ +pdx-1: def greet(name): +pdx-2: print(f"Hello, {name}!") diff --git a/test/evals/promptfoo-poc/verify/assets/py/code/json.post.py b/test/evals/promptfoo-poc/verify/assets/py/code/json.post.py new file mode 100644 index 00000000..fbb0d4d5 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/code/json.post.py @@ -0,0 +1,8 @@ +import json + +def convert_to_json(data): + try: + json_string = json.dumps(data) + return json_string + except TypeError as e: + return str(e) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/py/code/json.py b/test/evals/promptfoo-poc/verify/assets/py/code/json.py new file mode 100644 index 00000000..d087a9b7 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/code/json.py @@ -0,0 +1,5 @@ +pdx-1: import json +pdx-2: +pdx-3: def convert_to_json(data): +pdx-4: # incomplete function needs proper implementation +pdx-5: pass diff --git a/test/evals/promptfoo-poc/verify/assets/py/code/main.post.py b/test/evals/promptfoo-poc/verify/assets/py/code/main.post.py new file mode 100644 index 00000000..e69de29b diff --git a/test/evals/promptfoo-poc/verify/assets/py/code/main.py b/test/evals/promptfoo-poc/verify/assets/py/code/main.py new file mode 100644 index 00000000..87669330 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/code/main.py @@ -0,0 +1,6 @@ +pdx-1: def main(): +pdx-2: print("Initial setup complete") +pdx-3: +pdx-4: if __name__ == "__main__": +pdx-5: main() +pdx-6: diff --git a/test/evals/promptfoo-poc/verify/assets/py/diffs/greet.diff.txt b/test/evals/promptfoo-poc/verify/assets/py/diffs/greet.diff.txt new file mode 100644 index 00000000..a1d814bf --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/diffs/greet.diff.txt @@ -0,0 +1,14 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/py/code/greet.py 2024-07-29 18:54:05.768877313 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/py/code/greet.post.py 2024-07-19 16:33:23.976712064 +0100 +@@ -1,2 +1,9 @@ +-def greet(name): +- print(f"Hello, {name}!") ++class Greeter: ++ @staticmethod ++ def greet(name): ++ print(f"Hello, {name}!") ++ ++class Greeter: ++ @staticmethod ++ def greet(name): ++ print(f"Welcome, {name}!") diff --git a/test/evals/promptfoo-poc/verify/assets/py/diffs/json.diff.txt b/test/evals/promptfoo-poc/verify/assets/py/diffs/json.diff.txt new file mode 100644 index 00000000..866b4f1c --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/diffs/json.diff.txt @@ -0,0 +1,14 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/py/code/json.py 2024-07-29 18:54:12.349030502 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/py/code/json.post.py 2024-07-19 16:33:23.972711964 +0100 +@@ -1,5 +1,8 @@ + import json + + def convert_to_json(data): +- # incomplete function needs proper implementation +- pass ++ try: ++ json_string = json.dumps(data) ++ return json_string ++ except TypeError as e: ++ return str(e) +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/py/diffs/main.diff.txt b/test/evals/promptfoo-poc/verify/assets/py/diffs/main.diff.txt new file mode 100644 index 00000000..5c0d4dd7 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/py/diffs/main.diff.txt @@ -0,0 +1,9 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/py/code/main.py 2024-07-29 18:54:17.917160176 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/py/code/main.post.py 2024-07-19 16:33:23.972711964 +0100 +@@ -1,6 +0,0 @@ +-def main(): +- print("Initial setup complete") +- +-if __name__ == "__main__": +- main() +- diff --git a/test/evals/promptfoo-poc/verify/assets/ts/changes/add.changes.md b/test/evals/promptfoo-poc/verify/assets/ts/changes/add.changes.md new file mode 100644 index 00000000..26e58dd0 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/ts/changes/add.changes.md @@ -0,0 +1,4 @@ +### Subtask 1: Add TypeScript type annotations. + +### Subtask 2: Implement error handling for non-numeric inputs. + diff --git a/test/evals/promptfoo-poc/verify/assets/ts/code/add.post.ts b/test/evals/promptfoo-poc/verify/assets/ts/code/add.post.ts new file mode 100644 index 00000000..b517846e --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/ts/code/add.post.ts @@ -0,0 +1,6 @@ +function add(a: number, b: number): number { + if (typeof a !== 'number' || typeof b !== 'number') { + throw new Error('Input must be numbers'); + } + return a + b; +} \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/assets/ts/code/add.ts b/test/evals/promptfoo-poc/verify/assets/ts/code/add.ts new file mode 100644 index 00000000..651fa996 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/ts/code/add.ts @@ -0,0 +1,3 @@ +pdx-1: function add(a, b) { +pdx-2: return a + b; +pdx-3: } diff --git a/test/evals/promptfoo-poc/verify/assets/ts/diffs/add.diff.txt b/test/evals/promptfoo-poc/verify/assets/ts/diffs/add.diff.txt new file mode 100644 index 00000000..2a1d7231 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/assets/ts/diffs/add.diff.txt @@ -0,0 +1,12 @@ +--- /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/ts/code/add.ts 2024-07-29 19:59:02.908860048 +0100 ++++ /mnt/data/Projects/Clients/plandex/test/evals/promptfoo-poc/verify/assets/ts/code/add.post.ts 2024-07-19 16:33:23.976712064 +0100 +@@ -1,3 +1,6 @@ +-function add(a, b) { ++function add(a: number, b: number): number { ++ if (typeof a !== 'number' || typeof b !== 'number') { ++ throw new Error('Input must be numbers'); ++ } + return a + b; +-} ++} +\ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/promptfooconfig.yaml b/test/evals/promptfoo-poc/verify/promptfooconfig.yaml index 62cee381..26a29fb9 100644 --- a/test/evals/promptfoo-poc/verify/promptfooconfig.yaml +++ b/test/evals/promptfoo-poc/verify/promptfooconfig.yaml @@ -6,4 +6,8 @@ prompts: - file://verify.prompt.txt providers: - file://verify.provider.yml +defaultTest: + assert: + - type: is-json + - type: is-valid-openai-tools-call tests: tests/*.test.yml diff --git a/test/evals/promptfoo-poc/verify/tests/add.ts.test.yml b/test/evals/promptfoo-poc/verify/tests/add.ts.test.yml new file mode 100644 index 00000000..2155a39c --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/add.ts.test.yml @@ -0,0 +1,15 @@ +- description: "Verify add functionality implementation" + vars: + preBuildState: file://assets/ts/code/add.ts + changes: file://assets/ts/changes/add.changes.md + postBuildState: file://assets/ts/code/add.post.ts + diffs: file://assets/ts/diffs/add.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("function add(a: number, b: number): number") + ) diff --git a/test/evals/promptfoo-poc/verify/tests/calculator.java.test.yml b/test/evals/promptfoo-poc/verify/tests/calculator.java.test.yml new file mode 100644 index 00000000..11fafb97 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/calculator.java.test.yml @@ -0,0 +1,15 @@ +- description: "Verify calculator functionality implementation" + vars: + preBuildState: file://assets/java/code/calculator.java + changes: file://assets/java/changes/calculator.changes.md + postBuildState: file://assets/java/code/calculator.post.java + diffs: file://assets/java/diffs/calculator.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("public int subtract(int a, int b)") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/feature_x.go.test.yml b/test/evals/promptfoo-poc/verify/tests/feature_x.go.test.yml new file mode 100644 index 00000000..4e096d25 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/feature_x.go.test.yml @@ -0,0 +1,15 @@ +- description: "Verify Feature X implementation" + vars: + preBuildState: file://assets/go/code/feature_x.go + changes: file://assets/go/changes/feature_x.changes.md + postBuildState: file://assets/go/code/feature_x.post.go + diffs: file://assets/go/diffs/feature_x.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("fmt.Println(\"Feature X implemented\")") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/hello.go.test.yml b/test/evals/promptfoo-poc/verify/tests/hello.go.test.yml new file mode 100644 index 00000000..42d0ae20 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/hello.go.test.yml @@ -0,0 +1,15 @@ +- description: "Verify Hello function implementation" + vars: + preBuildState: file://assets/go/code/hello.go + changes: file://assets/go/changes/hello.changes.md + postBuildState: file://assets/go/code/hello.post.go + diffs: file://assets/go/diffs/hello.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("hello(\"World\")") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/hello_date.go.test.yml b/test/evals/promptfoo-poc/verify/tests/hello_date.go.test.yml new file mode 100644 index 00000000..dfded0d7 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/hello_date.go.test.yml @@ -0,0 +1,15 @@ +- description: "Verify Hello Date function implementation" + vars: + preBuildState: file://assets/go/code/hello_date.go + changes: file://assets/go/changes/hello_date.changes.md + postBuildState: file://assets/go/code/hello_date.post.go + diffs: file://assets/go/diffs/hello_date.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("fmt.Println(\"Hello, World! Current date:\", current_time)") + ) diff --git a/test/evals/promptfoo-poc/verify/tests/hello_universe.go.test.yml b/test/evals/promptfoo-poc/verify/tests/hello_universe.go.test.yml new file mode 100644 index 00000000..ade7cce6 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/hello_universe.go.test.yml @@ -0,0 +1,15 @@ +- description: "Verify Hello Universe function implementation" + vars: + preBuildState: file://assets/go/code/hello_universe.go + changes: file://assets/go/changes/hello_universe.changes.md + postBuildState: file://assets/go/code/hello_universe.post.go + diffs: file://assets/go/diffs/hello_universe.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("fmt.Println(\"Hello, Universe!\")") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/hello_world.java.test.yml b/test/evals/promptfoo-poc/verify/tests/hello_world.java.test.yml new file mode 100644 index 00000000..14e5e82b --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/hello_world.java.test.yml @@ -0,0 +1,15 @@ +- description: "Verify Hello World functionality implementation" + vars: + preBuildState: file://assets/java/code/hello_world.java + changes: file://assets/java/changes/hello_world.changes.md + postBuildState: file://assets/java/code/hello_world.post.py + diffs: file://assets/java/diffs/hello_world.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("# TODO: Suggest future improvements") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/json.py.test.yml b/test/evals/promptfoo-poc/verify/tests/json.py.test.yml new file mode 100644 index 00000000..b1120412 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/json.py.test.yml @@ -0,0 +1,15 @@ +- description: "Verify JSON conversion functionality implementation" + vars: + preBuildState: file://assets/py/code/json.py + changes: file://assets/py/changes/json.changes.md + postBuildState: file://assets/py/code/json.post.py + diffs: file://assets/py/diffs/json.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("def convert_to_json(data):") + ) diff --git a/test/evals/promptfoo-poc/verify/tests/main_hello_universe.go.test.yml b/test/evals/promptfoo-poc/verify/tests/main_hello_universe.go.test.yml new file mode 100644 index 00000000..e8aa08f0 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/main_hello_universe.go.test.yml @@ -0,0 +1,15 @@ +- description: "Verify Main Hello Universe function implementation" + vars: + preBuildState: file://assets/go/code/main_hello_universe.go + changes: file://assets/go/changes/main_hello_universe.changes.md + postBuildState: file://assets/go/code/main_hello_universe.post.go + diffs: file://assets/go/diffs/main_hello_universe.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("fmt.Println(\"Hello, Universe!\")") + ) diff --git a/test/evals/promptfoo-poc/verify/tests/removal.go.test.yml b/test/evals/promptfoo-poc/verify/tests/removal.go.test.yml new file mode 100644 index 00000000..d161c557 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/removal.go.test.yml @@ -0,0 +1,15 @@ +- description: "Removal of code errors" + vars: + postBuildState: file://assets/go/code/removal.post.go + preBuildState: file://assets/go/code/removal.go + changes: file://assets/go/changes/removal.changes.md + diffs: file://assets/go/diffs/removal.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("deleteIds[context.Id] = true") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/removal.test.yml b/test/evals/promptfoo-poc/verify/tests/removal.test.yml deleted file mode 100644 index d48fbf95..00000000 --- a/test/evals/promptfoo-poc/verify/tests/removal.test.yml +++ /dev/null @@ -1,13 +0,0 @@ -- description: "Removal of code errors" - vars: - preBuildState: file://assets/shared/pre_build.go - changes: file://assets/removal/changes.md - postBuildState: file://assets/removal/post_build.go - diffs: file://assets/removal/diff.txt - assert: - - type: is-json - - type: is-valid-openai-tools-call - - type: javascript - value: | - var args = JSON.parse(output[0].function.arguments) - return args.hasRemovedCodeErrors \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/reverse.go.test.yml b/test/evals/promptfoo-poc/verify/tests/reverse.go.test.yml new file mode 100644 index 00000000..c1731e0a --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/reverse.go.test.yml @@ -0,0 +1,15 @@ +- description: "Verify Reverse function implementation" + vars: + preBuildState: file://assets/go/code/reverse.go + changes: file://assets/go/changes/reverse.changes.md + postBuildState: file://assets/go/code/reverse.post.go + diffs: file://assets/go/diffs/reverse.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("reverse(\"Hello, World!\")") + ) diff --git a/test/evals/promptfoo-poc/verify/tests/solution.java.test.yml b/test/evals/promptfoo-poc/verify/tests/solution.java.test.yml new file mode 100644 index 00000000..8a80de05 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/solution.java.test.yml @@ -0,0 +1,15 @@ +- description: "Verify solution functionality implementation" + vars: + preBuildState: file://assets/java/code/solution.java + changes: file://assets/java/changes/solution.changes.md + postBuildState: file://assets/java/code/solution.post.java + diffs: file://assets/java/diffs/solution.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("Collections.sort(list)") + ) diff --git a/test/evals/promptfoo-poc/verify/tests/start.java.test.yml b/test/evals/promptfoo-poc/verify/tests/start.java.test.yml new file mode 100644 index 00000000..425adc3f --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/start.java.test.yml @@ -0,0 +1,15 @@ +- description: "Verify start functionality implementation" + vars: + preBuildState: file://assets/java/code/start.java + changes: file://assets/java/changes/start.changes.md + postBuildState: file://assets/java/code/start.post.java + diffs: file://assets/java/diffs/start.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("public void greet()") + ) \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/sum.go.test.yml b/test/evals/promptfoo-poc/verify/tests/sum.go.test.yml new file mode 100644 index 00000000..19f57a08 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/sum.go.test.yml @@ -0,0 +1,15 @@ +- description: "Verify sum function implementation" + vars: + preBuildState: file://assets/go/code/sum.go + changes: file://assets/go/changes/sum.changes.md + postBuildState: file://assets/go/code/sum.post.go + diffs: file://assets/go/diffs/sum.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange && change.new.includes("sum(5, 3)") + ) diff --git a/test/evals/promptfoo-poc/verify/tests/todo/empty_function_bodies.test.yml b/test/evals/promptfoo-poc/verify/tests/todo/empty_function_bodies.test.yml new file mode 100644 index 00000000..0fe573fb --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/todo/empty_function_bodies.test.yml @@ -0,0 +1,16 @@ +- description: "Test for Empty Function Bodies" + vars: + postBuildState: file://assets/valid/post_build.go + preBuildState: file://assets/shared/pre_build.go + changes: file://assets/valid/changes.md + diffs: file://assets/valid/diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: not-contains + value: "{}" + - type: javascript + value: "output.includes('{}') === false && !output.match(/function\\(.*\\)\\s*{\\s*}/)" + - type: levenshtein + value: '{{postBuildState}}' + threshold: 10 diff --git a/test/evals/promptfoo-poc/verify/tests/todo/existing_code.test.yml b/test/evals/promptfoo-poc/verify/tests/todo/existing_code.test.yml new file mode 100644 index 00000000..ab332158 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/todo/existing_code.test.yml @@ -0,0 +1,18 @@ +- description: "Test for Existing Code Reference" + vars: + postBuildState: file://assets/valid/post_build.go + preBuildState: file://assets/shared/pre_build.go + changes: file://assets/valid/changes.md + diffs: file://assets/valid/diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: not-contains + value: "... existing code ..." + - type: javascript + value: "output.includes('... existing code ...') === false" + - type: llm-rubric + value: "Code should not contain the phrase '... existing code ...', or variants thereof, and must include executable statements instead." + - type: levenshtein + value: '{{postBuildState}}' + threshold: 10 diff --git a/test/evals/promptfoo-poc/verify/tests/todo/is_equal.test.yml b/test/evals/promptfoo-poc/verify/tests/todo/is_equal.test.yml new file mode 100644 index 00000000..6ac9b427 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/todo/is_equal.test.yml @@ -0,0 +1,11 @@ +- description: "Test for equality of two files" + vars: + postBuildState: file://assets/valid/post_build.go + preBuildState: file://assets/shared/pre_build.go + changes: file://assets/valid/changes.md + diffs: file://assets/valid/diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: equals + value: '{{postBuildState}}' \ No newline at end of file diff --git a/test/evals/promptfoo-poc/verify/tests/todo/placeholder_content.test.yml b/test/evals/promptfoo-poc/verify/tests/todo/placeholder_content.test.yml new file mode 100644 index 00000000..b879a7ac --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/todo/placeholder_content.test.yml @@ -0,0 +1,21 @@ +- description: "Test for Placeholder Content" + vars: + postBuildState: file://assets/valid/post_build.go + preBuildState: file://assets/shared/pre_build.go + changes: file://assets/valid/changes.md + diffs: file://assets/valid/diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: not-contains + value: "PLACEHOLDER" + - type: not-contains + value: "TBD" + - type: javascript + value: "output.includes('PLACEHOLDER') === false && output.includes('TBD') === false" + - type: context-faithfulness + value: "Generated code should accurately reflect the provided context." + - type: context-recall + value: "Key elements from the pre-build state should appear in the generated code." + - type: context-relevance + value: "Generated code should be relevant to the provided pre-build state." diff --git a/test/evals/promptfoo-poc/verify/tests/todo/template_insertion.test.yml b/test/evals/promptfoo-poc/verify/tests/todo/template_insertion.test.yml new file mode 100644 index 00000000..10ad4cd1 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/todo/template_insertion.test.yml @@ -0,0 +1,18 @@ +- description: "Test for Template Insertion" + vars: + postBuildState: file://assets/valid/post_build.go + preBuildState: file://assets/shared/pre_build.go + changes: file://assets/valid/changes.md + diffs: file://assets/valid/diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: not-contains + value: "{{" + - type: not-contains + value: "}}" + - type: javascript + value: "output.includes('{{') === false && output.includes('}}') === false" + - type: regex + value: "^(?!.*\\{\\{.*\\}\\}).*$" # Ensure no templated content is present + diff --git a/test/evals/promptfoo-poc/verify/tests/todo/todo_execution.test.yml b/test/evals/promptfoo-poc/verify/tests/todo/todo_execution.test.yml new file mode 100644 index 00000000..22a48630 --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/todo/todo_execution.test.yml @@ -0,0 +1,15 @@ +- description: "Test for TODO Execution" + vars: + postBuildState: file://assets/valid/post_build.go + preBuildState: file://assets/shared/pre_build.go + changes: file://assets/valid/changes.md + diffs: file://assets/valid/diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: not-contains + value: "TODO" + - type: javascript + value: "output.includes('TODO') === false" + - type: llm-rubric + value: "Code should not contain TODO items and must execute all tasks reliably." diff --git a/test/evals/promptfoo-poc/verify/tests/validate.test.yml b/test/evals/promptfoo-poc/verify/tests/validate.go.test.yml similarity index 66% rename from test/evals/promptfoo-poc/verify/tests/validate.test.yml rename to test/evals/promptfoo-poc/verify/tests/validate.go.test.yml index 13eab397..c6b87c99 100644 --- a/test/evals/promptfoo-poc/verify/tests/validate.test.yml +++ b/test/evals/promptfoo-poc/verify/tests/validate.go.test.yml @@ -1,9 +1,9 @@ - description: "Validation of the code changes" vars: - preBuildState: file://assets/shared/pre_build.go - changes: file://assets/valid/changes.md - postBuildState: file://assets/valid/post_build.go - diffs: file://assets/valid/diff.txt + postBuildState: file://assets/go/code/valid.post.go + preBuildState: file://assets/go/code/valid.go + changes: file://assets/go/changes/valid.changes.md + diffs: file://assets/go/diffs/valid.diff.txt assert: - type: is-json - type: is-valid-openai-tools-call diff --git a/test/evals/promptfoo-poc/verify/tests/verify.py.test.yml b/test/evals/promptfoo-poc/verify/tests/verify.py.test.yml new file mode 100644 index 00000000..8d36943c --- /dev/null +++ b/test/evals/promptfoo-poc/verify/tests/verify.py.test.yml @@ -0,0 +1,15 @@ +- description: "Verify greet functionality implementation" + vars: + preBuildState: file://assets/py/code/greet.py + changes: file://assets/py/changes/greet.changes.md + postBuildState: file://assets/py/code/greet.post.py + diffs: file://assets/py/diffs/greet.diff.txt + assert: + - type: is-json + - type: is-valid-openai-tools-call + - type: javascript + value: | + var args = JSON.parse(output[0].function.arguments) + return args.hasRemovedCodeErrors && args.changes.some( + change => change.hasChange and change.new.includes("class Greet:") + )