67
67
)
68
68
from databricks .sql .telemetry .latency_logger import log_latency
69
69
from databricks .sql .telemetry .models .enums import StatementType
70
+ from databricks .sql .common .http import DatabricksHttpClient , HttpMethod
70
71
71
72
logger = logging .getLogger (__name__ )
72
73
@@ -455,7 +456,6 @@ def __init__(
455
456
self .active_command_id = None
456
457
self .escaper = ParamEscaper ()
457
458
self .lastrowid = None
458
- self ._input_stream_data : Optional [BinaryIO ] = None
459
459
460
460
self .ASYNC_DEFAULT_POLLING_INTERVAL = 2
461
461
@@ -616,8 +616,29 @@ def _check_not_closed(self):
616
616
session_id_hex = self .connection .get_session_id_hex (),
617
617
)
618
618
619
+ def _validate_staging_http_response (self , response : requests .Response , operation_name : str = "staging operation" ) -> None :
620
+
621
+ # Check response codes
622
+ OK = requests .codes .ok # 200
623
+ CREATED = requests .codes .created # 201
624
+ ACCEPTED = requests .codes .accepted # 202
625
+ NO_CONTENT = requests .codes .no_content # 204
626
+
627
+ if response .status_code not in [OK , CREATED , NO_CONTENT , ACCEPTED ]:
628
+ raise OperationalError (
629
+ f"{ operation_name } over HTTP was unsuccessful: { response .status_code } -{ response .text } " ,
630
+ session_id_hex = self .connection .get_session_id_hex (),
631
+ )
632
+
633
+ if response .status_code == ACCEPTED :
634
+ logger .debug (
635
+ "Response code %s from server indicates %s was accepted "
636
+ "but not yet applied on the server. It's possible this command may fail later." ,
637
+ ACCEPTED , operation_name
638
+ )
639
+
619
640
def _handle_staging_operation (
620
- self , staging_allowed_local_path : Union [None , str , List [str ]]
641
+ self , staging_allowed_local_path : Union [None , str , List [str ]], input_stream : Optional [ BinaryIO ] = None
621
642
):
622
643
"""Fetch the HTTP request instruction from a staging ingestion command
623
644
and call the designated handler.
@@ -641,14 +662,9 @@ def _handle_staging_operation(
641
662
row .operation == "PUT"
642
663
and getattr (row , "localFile" , None ) == "__input_stream__"
643
664
):
644
- if not self ._input_stream_data :
645
- raise ProgrammingError (
646
- "No input stream provided for streaming operation" ,
647
- session_id_hex = self .connection .get_session_id_hex (),
648
- )
649
665
return self ._handle_staging_put_stream (
650
666
presigned_url = row .presignedUrl ,
651
- stream = self . _input_stream_data ,
667
+ stream = input_stream ,
652
668
headers = headers ,
653
669
)
654
670
@@ -696,7 +712,8 @@ def _handle_staging_operation(
696
712
}
697
713
698
714
logger .debug (
699
- f"Attempting staging operation indicated by server: { row .operation } - { getattr (row , 'localFile' , '' )} "
715
+ "Attempting staging operation indicated by server: %s - %s" ,
716
+ row .operation , getattr (row , 'localFile' , '' )
700
717
)
701
718
702
719
# TODO: Create a retry loop here to re-attempt if the request times out or fails
@@ -720,54 +737,37 @@ def _handle_staging_put_stream(
720
737
self ,
721
738
presigned_url : str ,
722
739
stream : BinaryIO ,
723
- headers : Optional [ dict ] = None ,
740
+ headers : dict = {} ,
724
741
) -> None :
725
742
"""Handle PUT operation with streaming data.
726
743
727
744
Args:
728
745
presigned_url: The presigned URL for upload
729
746
stream: Binary stream to upload
730
- headers: Optional HTTP headers
747
+ headers: HTTP headers
731
748
732
749
Raises:
750
+ ProgrammingError: If no input stream is provided
733
751
OperationalError: If the upload fails
734
752
"""
735
753
736
- # Prepare headers
737
- http_headers = dict (headers ) if headers else {}
738
-
739
- try :
740
- # Stream directly to presigned URL
741
- response = requests .put (
742
- url = presigned_url ,
743
- data = stream ,
744
- headers = http_headers ,
745
- timeout = 300 , # 5 minute timeout
754
+ if not stream :
755
+ raise ProgrammingError (
756
+ "No input stream provided for streaming operation" ,
757
+ session_id_hex = self .connection .get_session_id_hex (),
746
758
)
747
759
748
- # Check response codes
749
- OK = requests .codes .ok # 200
750
- CREATED = requests .codes .created # 201
751
- ACCEPTED = requests .codes .accepted # 202
752
- NO_CONTENT = requests .codes .no_content # 204
760
+ http_client = DatabricksHttpClient .get_instance ()
753
761
754
- if response .status_code not in [OK , CREATED , NO_CONTENT , ACCEPTED ]:
755
- raise OperationalError (
756
- f"Staging operation over HTTP was unsuccessful: { response .status_code } -{ response .text } " ,
757
- session_id_hex = self .connection .get_session_id_hex (),
758
- )
759
-
760
- if response .status_code == ACCEPTED :
761
- logger .debug (
762
- f"Response code { ACCEPTED } from server indicates upload was accepted "
763
- "but not yet applied on the server. It's possible this command may fail later."
764
- )
765
-
766
- except requests .exceptions .RequestException as e :
767
- raise OperationalError (
768
- f"HTTP request failed during stream upload: { str (e )} " ,
769
- session_id_hex = self .connection .get_session_id_hex (),
770
- ) from e
762
+ # Stream directly to presigned URL
763
+ with http_client .execute (
764
+ method = HttpMethod .PUT ,
765
+ url = presigned_url ,
766
+ data = stream ,
767
+ headers = headers ,
768
+ timeout = 300 , # 5 minute timeout
769
+ ) as response :
770
+ self ._validate_staging_http_response (response , "stream upload" )
771
771
772
772
@log_latency (StatementType .SQL )
773
773
def _handle_staging_put (
@@ -787,27 +787,7 @@ def _handle_staging_put(
787
787
with open (local_file , "rb" ) as fh :
788
788
r = requests .put (url = presigned_url , data = fh , headers = headers )
789
789
790
- # fmt: off
791
- # Design borrowed from: https://stackoverflow.com/a/2342589/5093960
792
-
793
- OK = requests .codes .ok # 200
794
- CREATED = requests .codes .created # 201
795
- ACCEPTED = requests .codes .accepted # 202
796
- NO_CONTENT = requests .codes .no_content # 204
797
-
798
- # fmt: on
799
-
800
- if r .status_code not in [OK , CREATED , NO_CONTENT , ACCEPTED ]:
801
- raise OperationalError (
802
- f"Staging operation over HTTP was unsuccessful: { r .status_code } -{ r .text } " ,
803
- session_id_hex = self .connection .get_session_id_hex (),
804
- )
805
-
806
- if r .status_code == ACCEPTED :
807
- logger .debug (
808
- f"Response code { ACCEPTED } from server indicates ingestion command was accepted "
809
- + "but not yet applied on the server. It's possible this command may fail later."
810
- )
790
+ self ._validate_staging_http_response (r , "file upload" )
811
791
812
792
@log_latency (StatementType .SQL )
813
793
def _handle_staging_get (
@@ -856,8 +836,8 @@ def execute(
856
836
self ,
857
837
operation : str ,
858
838
parameters : Optional [TParameterCollection ] = None ,
859
- input_stream : Optional [BinaryIO ] = None ,
860
839
enforce_embedded_schema_correctness = False ,
840
+ input_stream : Optional [BinaryIO ] = None ,
861
841
) -> "Cursor" :
862
842
"""
863
843
Execute a query and wait for execution to complete.
@@ -894,62 +874,49 @@ def execute(
894
874
logger .debug (
895
875
"Cursor.execute(operation=%s, parameters=%s)" , operation , parameters
896
876
)
897
- try :
898
- # Store stream data if provided
899
- self ._input_stream_data = None
900
- if input_stream is not None :
901
- # Validate stream has required methods
902
- if not hasattr (input_stream , "read" ):
903
- raise TypeError (
904
- "input_stream must be a binary stream with read() method"
905
- )
906
- self ._input_stream_data = input_stream
877
+ param_approach = self ._determine_parameter_approach (parameters )
878
+ if param_approach == ParameterApproach .NONE :
879
+ prepared_params = NO_NATIVE_PARAMS
880
+ prepared_operation = operation
907
881
908
- param_approach = self ._determine_parameter_approach (parameters )
909
- if param_approach == ParameterApproach .NONE :
910
- prepared_params = NO_NATIVE_PARAMS
911
- prepared_operation = operation
882
+ elif param_approach == ParameterApproach .INLINE :
883
+ prepared_operation , prepared_params = self ._prepare_inline_parameters (
884
+ operation , parameters
885
+ )
886
+ elif param_approach == ParameterApproach .NATIVE :
887
+ normalized_parameters = self ._normalize_tparametercollection (parameters )
888
+ param_structure = self ._determine_parameter_structure (
889
+ normalized_parameters
890
+ )
891
+ transformed_operation = transform_paramstyle (
892
+ operation , normalized_parameters , param_structure
893
+ )
894
+ prepared_operation , prepared_params = self ._prepare_native_parameters (
895
+ transformed_operation , normalized_parameters , param_structure
896
+ )
912
897
913
- elif param_approach == ParameterApproach .INLINE :
914
- prepared_operation , prepared_params = self ._prepare_inline_parameters (
915
- operation , parameters
916
- )
917
- elif param_approach == ParameterApproach .NATIVE :
918
- normalized_parameters = self ._normalize_tparametercollection (parameters )
919
- param_structure = self ._determine_parameter_structure (
920
- normalized_parameters
921
- )
922
- transformed_operation = transform_paramstyle (
923
- operation , normalized_parameters , param_structure
924
- )
925
- prepared_operation , prepared_params = self ._prepare_native_parameters (
926
- transformed_operation , normalized_parameters , param_structure
927
- )
898
+ self ._check_not_closed ()
899
+ self ._close_and_clear_active_result_set ()
900
+ self .active_result_set = self .backend .execute_command (
901
+ operation = prepared_operation ,
902
+ session_id = self .connection .session .session_id ,
903
+ max_rows = self .arraysize ,
904
+ max_bytes = self .buffer_size_bytes ,
905
+ lz4_compression = self .connection .lz4_compression ,
906
+ cursor = self ,
907
+ use_cloud_fetch = self .connection .use_cloud_fetch ,
908
+ parameters = prepared_params ,
909
+ async_op = False ,
910
+ enforce_embedded_schema_correctness = enforce_embedded_schema_correctness ,
911
+ )
928
912
929
- self ._check_not_closed ()
930
- self ._close_and_clear_active_result_set ()
931
- self .active_result_set = self .backend .execute_command (
932
- operation = prepared_operation ,
933
- session_id = self .connection .session .session_id ,
934
- max_rows = self .arraysize ,
935
- max_bytes = self .buffer_size_bytes ,
936
- lz4_compression = self .connection .lz4_compression ,
937
- cursor = self ,
938
- use_cloud_fetch = self .connection .use_cloud_fetch ,
939
- parameters = prepared_params ,
940
- async_op = False ,
941
- enforce_embedded_schema_correctness = enforce_embedded_schema_correctness ,
913
+ if self .active_result_set and self .active_result_set .is_staging_operation :
914
+ self ._handle_staging_operation (
915
+ staging_allowed_local_path = self .connection .staging_allowed_local_path ,
916
+ input_stream = input_stream
942
917
)
943
918
944
- if self .active_result_set and self .active_result_set .is_staging_operation :
945
- self ._handle_staging_operation (
946
- staging_allowed_local_path = self .connection .staging_allowed_local_path
947
- )
948
-
949
- return self
950
- finally :
951
- # Clean up stream data
952
- self ._input_stream_data = None
919
+ return self
953
920
954
921
@log_latency (StatementType .QUERY )
955
922
def execute_async (
0 commit comments