1414logger = logging .getLogger (__name__ )
1515router = APIRouter (tags = ["evaluation" ])
1616
17+
1718@router .post ("/evaluation/upload-dataset" )
1819async def upload_dataset (
1920 dataset_name : str ,
@@ -26,7 +27,7 @@ async def upload_dataset(
2627 The CSV file should have two columns: input and expected_output.
2728 Only the first 30 rows will be processed.
2829 """
29- if not file .filename .endswith (' .csv' ):
30+ if not file .filename .endswith (" .csv" ):
3031 return APIResponse .failure_response (error = "Only CSV files are supported" )
3132
3233 # Get Langfuse credentials
@@ -57,21 +58,25 @@ async def upload_dataset(
5758 # Read and validate CSV file
5859 contents = await file .read ()
5960 logger .info (f"Read { len (contents )} bytes from file" )
60-
61+
6162 # Decode contents and create CSV reader
62- csv_content = contents .decode (' utf-8' )
63+ csv_content = contents .decode (" utf-8" )
6364 logger .info (f"CSV content preview: { csv_content [:200 ]} ..." )
64-
65+
6566 csv_file = io .StringIO (csv_content )
6667 reader = csv .DictReader (csv_file )
67-
68+
6869 # Validate headers
6970 if not reader .fieldnames :
70- return APIResponse .failure_response (error = "CSV file is empty or has no headers" )
71-
71+ return APIResponse .failure_response (
72+ error = "CSV file is empty or has no headers"
73+ )
74+
7275 logger .info (f"CSV headers found: { reader .fieldnames } " )
73-
74- if not all (header in reader .fieldnames for header in ['input' , 'expected_output' ]):
76+
77+ if not all (
78+ header in reader .fieldnames for header in ["input" , "expected_output" ]
79+ ):
7580 return APIResponse .failure_response (
7681 error = "CSV must contain 'input' and 'expected_output' columns"
7782 )
@@ -82,36 +87,40 @@ async def upload_dataset(
8287 logger .info (f"Created dataset with ID: { dataset .id } " )
8388 except Exception as e :
8489 logger .error (f"Error creating dataset: { str (e )} " )
85- return APIResponse .failure_response (error = f"Failed to create dataset: { str (e )} " )
90+ return APIResponse .failure_response (
91+ error = f"Failed to create dataset: { str (e )} "
92+ )
8693
8794 # Process rows (limited to 30)
8895 rows_processed = 0
8996 rows_data = [] # Store rows for logging
90-
97+
9198 for row in reader :
9299 if rows_processed >= 30 :
93100 break
94101
95102 try :
96103 # Log the row data
97104 logger .info (f"Processing row { rows_processed + 1 } : { row } " )
98-
105+
99106 # Create dataset item
100107 item = langfuse .create_dataset_item (
101108 dataset_name = dataset_name ,
102- input = row [' input' ],
103- expected_output = row [' expected_output' ]
109+ input = row [" input" ],
110+ expected_output = row [" expected_output" ],
104111 )
105112 logger .info (f"Created dataset item with ID: { item .id } " )
106-
113+
107114 rows_processed += 1
108115 rows_data .append (row )
109116 except Exception as e :
110117 logger .error (f"Error processing row { rows_processed + 1 } : { str (e )} " )
111118 continue
112119
113120 if rows_processed == 0 :
114- return APIResponse .failure_response (error = "No rows were successfully processed" )
121+ return APIResponse .failure_response (
122+ error = "No rows were successfully processed"
123+ )
115124
116125 # Log summary
117126 logger .info (f"Successfully processed { rows_processed } rows" )
@@ -121,12 +130,12 @@ async def upload_dataset(
121130 data = {
122131 "message" : f"Successfully uploaded { rows_processed } rows to dataset '{ dataset_name } '" ,
123132 "rows_processed" : rows_processed ,
124- "dataset_id" : dataset .id if hasattr (dataset , 'id' ) else None
133+ "dataset_id" : dataset .id if hasattr (dataset , "id" ) else None ,
125134 }
126135 )
127136
128137 except Exception as e :
129138 logger .error (f"Error uploading dataset: { str (e )} " )
130139 return APIResponse .failure_response (error = str (e ))
131140 finally :
132- await file .close ()
141+ await file .close ()
0 commit comments