fix: remove async uploads

whoseoyster · whoseoyster · commit ee3af47b8721 · 2024-10-05T15:05:15.000-07:00
diff --git a/src/openlayer/lib/data/__init__.py b/src/openlayer/lib/data/__init__.py
@@ -4,14 +4,12 @@
     "StorageType",
     "upload_reference_dataframe",
     "upload_batch_inferences",
-    "upload_batch_inferences_async",
     "update_batch_inferences",
 ]
 
 from ._upload import StorageType
 from .batch_inferences import (
     update_batch_inferences,
     upload_batch_inferences,
-    upload_batch_inferences_async,
 )
 from .reference_dataset import upload_reference_dataframe
diff --git a/src/openlayer/lib/data/batch_inferences.py b/src/openlayer/lib/data/batch_inferences.py
@@ -1,6 +1,7 @@
 """Upload a batch of inferences to the Openlayer platform."""
 
 import time
+import logging
 import tempfile
 from typing import Optional
 
@@ -12,18 +13,18 @@
 from ... import Openlayer
 from ..._utils import maybe_transform
 from ...types.inference_pipelines import data_stream_params
-import asyncio
 
+log: logging.Logger = logging.getLogger(__name__)
 
-async def upload_batch_inferences_async(
+
+def upload_batch_inferences(
     client: Openlayer,
     inference_pipeline_id: str,
     config: data_stream_params.Config,
     dataset_df: Optional[pd.DataFrame] = None,
     dataset_path: Optional[str] = None,
     storage_type: Optional[StorageType] = None,
     merge: bool = False,
-    verbose: bool = False,
 ) -> None:
     """Uploads a batch of inferences to the Openlayer platform."""
     if dataset_df is None and dataset_path is None:
@@ -45,16 +46,9 @@ async def upload_batch_inferences_async(
         # writer
         if dataset_df is not None:
             temp_file_path = f"{tmp_dir}/dataset.arrow"
-            if verbose:
-                print("Converting DataFrame to pyarrow Table...")
             pa_table = pa.Table.from_pandas(dataset_df)
             pa_schema = pa_table.schema
 
-            if verbose:
-                print(
-                    "Writing Arrow Table using RecordBatchStreamWriter to "
-                    f"{temp_file_path}"
-                )
             with pa.ipc.RecordBatchStreamWriter(temp_file_path, pa_schema) as writer:
                 writer.write_table(pa_table, max_chunksize=16384)
         else:
@@ -64,14 +58,15 @@ async def upload_batch_inferences_async(
         # camelCase the config
         config = maybe_transform(config, data_stream_params.Config)
 
-        # Upload tarball to storage
-        if verbose:
-            print("Uploading dataset to storage via presigned URL...")
-        uploader.upload(
+        # Upload file to Openlayer storage
+        log.info("Uploading file to Openlayer")
+        response = uploader.upload(
             file_path=temp_file_path,
             object_name=object_name,
             presigned_url_response=presigned_url_response,
         )
+        if response.status_code != 200:
+            raise ValueError(f"Failed to upload file to storage: {response.text}")
 
     # Notify the backend
     client.post(
@@ -83,30 +78,7 @@ async def upload_batch_inferences_async(
             "config": config,
         },
     )
-
-
-def upload_batch_inferences(
-    client: Openlayer,
-    inference_pipeline_id: str,
-    config: data_stream_params.Config,
-    dataset_df: Optional[pd.DataFrame] = None,
-    dataset_path: Optional[str] = None,
-    storage_type: Optional[StorageType] = None,
-    merge: bool = False,
-    verbose: bool = False,
-) -> None:
-    asyncio.run(
-        upload_batch_inferences_async(
-            client,
-            inference_pipeline_id,
-            config,
-            dataset_df,
-            dataset_path,
-            storage_type,
-            merge,
-            verbose,
-        )
-    )
+    log.info("Success! Uploaded batch inferences")
 
 
 def update_batch_inferences(

Original file line number	Diff line number	Diff line change
`@@ -4,14 +4,12 @@`
`4`	`4`	`"StorageType",`
`5`	`5`	`"upload_reference_dataframe",`
`6`	`6`	`"upload_batch_inferences",`
`7`		`- "upload_batch_inferences_async",`
`8`	`7`	`"update_batch_inferences",`
`9`	`8`	`]`
`10`	`9`
`11`	`10`	`from ._upload import StorageType`
`12`	`11`	`from .batch_inferences import (`
`13`	`12`	`update_batch_inferences,`
`14`	`13`	`upload_batch_inferences,`
`15`		`- upload_batch_inferences_async,`
`16`	`14`	`)`
`17`	`15`	`from .reference_dataset import upload_reference_dataframe`