@@ -170,7 +170,6 @@ def export_metrics(
170170 Returns:
171171 A DataFrame containing the exported metric data.
172172 """
173- import pandas as pd
174173
175174 response = self .request(
176175 " GET" ,
@@ -265,7 +264,6 @@ def export_parameters(
265264 Returns:
266265 A DataFrame containing the exported parameter data.
267266 """
268- import pandas as pd
269267
270268 response = self .request(
271269 " GET" ,
@@ -294,36 +292,48 @@ export_runs(
294292 filter : str | None = None ,
295293 status: StatusFilter = " completed" ,
296294 aggregations: list[MetricAggregationType] | None = None ,
297- ) -> pd.DataFrame
295+ format : ExportFormat = " parquet" ,
296+ base_dir: str | None = None ,
297+ ) -> str
298298```
299299
300- Exports run data for a specific project .
300+ Export runs using pagination - always writes to disk .
301301
302302** Parameters:**
303303
304304* ** ` project ` **
305305 (` str ` )
306- –The project identifier.
306+ –The project identifier
307307* ** ` filter ` **
308308 (` str | None ` , default:
309309 ` None `
310310 )
311- –A filter to apply to the exported data. Defaults to None.
311+ –A filter to apply to the exported data
312312* ** ` status ` **
313313 (` StatusFilter ` , default:
314314 ` 'completed' `
315315 )
316- –The status of runs to include. Defaults to "completed".
316+ –The status of runs to include
317317* ** ` aggregations ` **
318318 (` list[MetricAggregationType] | None ` , default:
319319 ` None `
320320 )
321- –A list of aggregation types to apply. Defaults to None.
321+ –A list of aggregation types to apply
322+ * ** ` format ` **
323+ (` ExportFormat ` , default:
324+ ` 'parquet' `
325+ )
326+ –Output format - "parquet", "csv", "json", "jsonl"
327+ * ** ` base_dir ` **
328+ (` str | None ` , default:
329+ ` None `
330+ )
331+ –Base directory for export (defaults to "./strikes-data")
322332
323333** Returns:**
324334
325- * ` DataFrame `
326- –A DataFrame containing the exported run data.
335+ * ** ` str ` ** ( ` str `
336+ ) –Path to the export directory
327337
328338<Accordion title = " Source code in dreadnode/api/client.py" icon = " code" >
329339``` python
@@ -332,35 +342,61 @@ def export_runs(
332342 project : str ,
333343 * ,
334344 filter : str | None = None ,
335- # format: ExportFormat = "parquet",
336345 status : StatusFilter = " completed" ,
337346 aggregations : list[MetricAggregationType] | None = None ,
338- ) -> " pd.DataFrame" :
347+ format : ExportFormat = " parquet" ,
348+ base_dir : str | None = None ,
349+ ) -> str :
339350 """
340- Exports run data for a specific project .
351+ Export runs using pagination - always writes to disk .
341352
342353 Args:
343- project: The project identifier.
344- filter: A filter to apply to the exported data. Defaults to None.
345- status: The status of runs to include. Defaults to "completed".
346- aggregations: A list of aggregation types to apply. Defaults to None.
354+ project: The project identifier
355+ filter: A filter to apply to the exported data
356+ status: The status of runs to include
357+ aggregations: A list of aggregation types to apply
358+ format: Output format - "parquet", "csv", "json", "jsonl"
359+ base_dir: Base directory for export (defaults to "./strikes-data")
347360
348361 Returns:
349- A DataFrame containing the exported run data.
362+ str: Path to the export directory
350363 """
351- import pandas as pd
352364
353- response = self .request(
365+ logger.info(f " Starting paginated export for project ' { project} ', format=' { format } ' " )
366+
367+ page = 1
368+ first_response = self .request(
354369 " GET" ,
355- f " /strikes/projects/ { project!s } /export " ,
370+ f " /strikes/projects/ { project!s } /export/paginated " ,
356371 params = {
357- " format " : " parquet " ,
372+ " page " : page ,
358373 " status" : status,
359374 ** ({" filter" : filter } if filter else {}),
360375 ** ({" aggregations" : aggregations} if aggregations else {}),
361376 },
362377 )
363- return pd.read_parquet(io.BytesIO(response.content))
378+
379+ if not first_response.content:
380+ logger.info(" No data found" )
381+
382+ first_chunk = pd.read_parquet(io.BytesIO(first_response.content))
383+
384+ total_runs = int (first_response.headers.get(" x-total" , " 0" ))
385+ has_more = first_response.headers.get(" x-has-more" , " false" ) == " true"
386+
387+ logger.info(f " Total runs: { total_runs} , Has more: { has_more} " )
388+
389+ logger.info(f " Writing { total_runs} runs to disk " )
390+ return self ._export_to_disk(
391+ project,
392+ first_chunk,
393+ dict (first_response.headers),
394+ filter ,
395+ status,
396+ aggregations,
397+ format ,
398+ str (base_dir) if base_dir else None ,
399+ )
364400```
365401
366402
0 commit comments