Skip to content

Commit 96ac8ce

Browse files
committed
test scripts
1 parent 3587822 commit 96ac8ce

File tree

6 files changed

+538
-25
lines changed

6 files changed

+538
-25
lines changed

google/cloud/storage/_experimental/asyncio/async_multi_range_downloader.py

Lines changed: 122 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,12 @@
1515
from __future__ import annotations
1616
import asyncio
1717
import google_crc32c
18+
import grpc
1819
from google.api_core import exceptions
1920
from google.api_core.retry_async import AsyncRetry
21+
from google.cloud._storage_v2.types.storage import BidiReadObjectRedirectedError
22+
from google.rpc import status_pb2
23+
from google.protobuf.any_pb2 import Any as AnyProto
2024

2125
from typing import List, Optional, Tuple, Any, Dict
2226

@@ -40,7 +44,57 @@
4044

4145

4246
_MAX_READ_RANGES_PER_BIDI_READ_REQUEST = 100
43-
47+
_BIDI_READ_REDIRECTED_TYPE_URL = "type.googleapis.com/google.storage.v2.BidiReadObjectRedirectedError"
48+
49+
50+
def _is_read_retryable(exc):
51+
"""Predicate to determine if a read operation should be retried."""
52+
print(f"--- Checking if retryable: {type(exc)}: {exc}")
53+
if isinstance(exc, (exceptions.ServiceUnavailable, exceptions.DeadlineExceeded, exceptions.TooManyRequests)):
54+
return True
55+
56+
grpc_error = None
57+
if isinstance(exc, exceptions.GoogleAPICallError) and exc.errors:
58+
if isinstance(exc.errors[0], grpc.aio.AioRpcError):
59+
grpc_error = exc.errors[0]
60+
61+
if grpc_error:
62+
print(f"--- Wrapped grpc.aio.AioRpcError code: {grpc_error.code()}")
63+
if grpc_error.code() in (
64+
grpc.StatusCode.UNAVAILABLE,
65+
grpc.StatusCode.INTERNAL,
66+
grpc.StatusCode.DEADLINE_EXCEEDED,
67+
grpc.StatusCode.RESOURCE_EXHAUSTED,
68+
):
69+
return True
70+
if grpc_error.code() == grpc.StatusCode.ABORTED:
71+
trailers = grpc_error.trailing_metadata()
72+
if not trailers:
73+
print("--- No trailers")
74+
return False
75+
76+
status_details_bin = None
77+
# *** CORRECTED TRAILER ACCESS ***
78+
for key, value in trailers:
79+
if key == 'grpc-status-details-bin':
80+
status_details_bin = value
81+
break
82+
83+
if status_details_bin:
84+
status_proto = status_pb2.Status()
85+
try:
86+
status_proto.ParseFromString(status_details_bin)
87+
for detail in status_proto.details:
88+
if detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL:
89+
print("--- Found BidiReadObjectRedirectedError, is retryable")
90+
return True
91+
print("--- BidiReadObjectRedirectedError type URL not found in details")
92+
except Exception as e:
93+
print(f"--- Error parsing status_details_bin: {e}")
94+
return False
95+
else:
96+
print("--- No grpc-status-details-bin in trailers")
97+
return False
4498

4599
class AsyncMultiRangeDownloader:
46100
"""Provides an interface for downloading multiple ranges of a GCS ``Object``
@@ -158,43 +212,95 @@ def __init__(
158212
self.read_handle = read_handle
159213
self.read_obj_str: Optional[_AsyncReadObjectStream] = None
160214
self._is_stream_open: bool = False
215+
self._routing_token: Optional[str] = None
216+
217+
async def _on_open_error(self, exc):
218+
"""Extracts routing token and read handle on redirect error during open."""
219+
print(f"--- _on_open_error called with {type(exc)}: {exc}")
220+
grpc_error = None
221+
if isinstance(exc, exceptions.GoogleAPICallError) and exc.errors:
222+
if isinstance(exc.errors[0], grpc.aio.AioRpcError):
223+
grpc_error = exc.errors[0]
224+
225+
if grpc_error and grpc_error.code() == grpc.StatusCode.ABORTED:
226+
trailers = grpc_error.trailing_metadata()
227+
if not trailers: return
228+
229+
status_details_bin = None
230+
# *** CORRECTED TRAILER ACCESS ***
231+
for key, value in trailers:
232+
if key == 'grpc-status-details-bin':
233+
status_details_bin = value
234+
break
235+
236+
if status_details_bin:
237+
status_proto = status_pb2.Status()
238+
try:
239+
status_proto.ParseFromString(status_details_bin)
240+
for detail in status_proto.details:
241+
if detail.type_url == _BIDI_READ_REDIRECTED_TYPE_URL:
242+
redirect_proto = BidiReadObjectRedirectedError()
243+
detail.Unpack(redirect_proto)
244+
if redirect_proto.routing_token:
245+
self._routing_token = redirect_proto.routing_token
246+
if redirect_proto.read_handle and redirect_proto.read_handle.handle:
247+
self.read_handle = redirect_proto.read_handle.handle
248+
print(f"--- BidiReadObjectRedirectedError caught in open, new token: {self._routing_token}, handle: {self.read_handle}")
249+
break
250+
except Exception as e:
251+
print(f"--- Error unpacking redirect in _on_open_error: {e}")
252+
253+
if self.read_obj_str and self.read_obj_str._is_open:
254+
try:
255+
await self.read_obj_str.close()
256+
except Exception:
257+
pass
258+
self._is_stream_open = False
161259

162260
async def open(self, retry_policy: Optional[AsyncRetry] = None) -> None:
163-
"""Opens the bidi-gRPC connection to read from the object.
164-
165-
This method initializes and opens an `_AsyncReadObjectStream` (bidi-gRPC stream) to
166-
for downloading ranges of data from GCS ``Object``.
167-
168-
"Opening" constitutes fetching object metadata such as generation number
169-
and read handle and sets them as attributes if not already set.
170-
"""
261+
"""Opens the bidi-gRPC connection to read from the object."""
171262
if self._is_stream_open:
172263
raise ValueError("Underlying bidi-gRPC stream is already open")
173264

174265
if retry_policy is None:
175-
# Default policy: retry generic transient errors
176-
retry_policy = AsyncRetry(
177-
predicate=lambda e: isinstance(e, (exceptions.ServiceUnavailable, exceptions.DeadlineExceeded))
178-
)
266+
retry_policy = AsyncRetry(predicate=_is_read_retryable, on_error=self._on_open_error)
267+
else:
268+
original_on_error = retry_policy._on_error
269+
async def combined_on_error(exc):
270+
await self._on_open_error(exc)
271+
if original_on_error:
272+
await original_on_error(exc)
273+
retry_policy = retry_policy.with_predicate(_is_read_retryable).with_on_error(combined_on_error)
179274

180275
async def _do_open():
276+
print("--- Attempting _do_open")
277+
if self._is_stream_open:
278+
self._is_stream_open = False
279+
181280
self.read_obj_str = _AsyncReadObjectStream(
182281
client=self.client,
183282
bucket_name=self.bucket_name,
184283
object_name=self.object_name,
185284
generation_number=self.generation_number,
186285
read_handle=self.read_handle,
187286
)
188-
await self.read_obj_str.open()
287+
288+
metadata = []
289+
if self._routing_token:
290+
metadata.append(("x-goog-request-params", f"routing_token={self._routing_token}"))
291+
print(f"--- Using routing_token for open: {self._routing_token}")
292+
self._routing_token = None
293+
294+
await self.read_obj_str.open(metadata=metadata if metadata else None)
189295

190296
if self.read_obj_str.generation_number:
191297
self.generation_number = self.read_obj_str.generation_number
192298
if self.read_obj_str.read_handle:
193299
self.read_handle = self.read_obj_str.read_handle
194300

195301
self._is_stream_open = True
302+
print("--- Stream opened successfully")
196303

197-
# Execute open with retry policy
198304
await retry_policy(_do_open)()
199305

200306
async def download_ranges(
@@ -259,9 +365,7 @@ async def download_ranges(
259365
lock = asyncio.Lock()
260366

261367
if retry_policy is None:
262-
retry_policy = AsyncRetry(
263-
predicate=lambda e: isinstance(e, (exceptions.ServiceUnavailable, exceptions.DeadlineExceeded))
264-
)
368+
retry_policy = AsyncRetry(predicate=_is_read_retryable)
265369

266370
# Initialize Global State for Retry Strategy
267371
download_states = {}

google/cloud/storage/_experimental/asyncio/async_read_object_stream.py

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
2323
"""
2424

25-
from typing import Optional
25+
from typing import List, Optional, Tuple
2626
from google.cloud import _storage_v2
2727
from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient
2828
from google.cloud.storage._experimental.asyncio.async_abstract_object_stream import (
@@ -93,23 +93,54 @@ def __init__(
9393
self.socket_like_rpc: Optional[AsyncBidiRpc] = None
9494
self._is_stream_open: bool = False
9595

96-
async def open(self) -> None:
96+
async def open(self, metadata: Optional[List[Tuple[str, str]]] = None) -> None:
9797
"""Opens the bidi-gRPC connection to read from the object.
9898
9999
This method sends an initial request to start the stream and receives
100100
the first response containing metadata and a read handle.
101+
102+
Args:
103+
metadata (Optional[List[Tuple[str, str]]]): Additional metadata
104+
to send with the initial stream request, e.g., for routing tokens.
101105
"""
102106
if self._is_stream_open:
103107
raise ValueError("Stream is already open")
108+
109+
read_object_spec = _storage_v2.BidiReadObjectSpec(
110+
bucket=self._full_bucket_name,
111+
object=self.object_name,
112+
generation=self.generation_number if self.generation_number else None,
113+
read_handle=self.read_handle if self.read_handle else None,
114+
)
115+
initial_request = _storage_v2.BidiReadObjectRequest(
116+
read_object_spec=read_object_spec
117+
)
118+
119+
# Build the x-goog-request-params header
120+
request_params = [f"bucket={self._full_bucket_name}"]
121+
other_metadata = []
122+
if metadata:
123+
for key, value in metadata:
124+
if key == "x-goog-request-params":
125+
request_params.append(value)
126+
else:
127+
other_metadata.append((key, value))
128+
129+
current_metadata = other_metadata
130+
current_metadata.append(("x-goog-request-params", ",".join(request_params)))
131+
104132
self.socket_like_rpc = AsyncBidiRpc(
105-
self.rpc, initial_request=self.first_bidi_read_req, metadata=self.metadata
133+
self.rpc, initial_request=initial_request, metadata=current_metadata
106134
)
107-
await self.socket_like_rpc.open() # this is actually 1 send
135+
await self.socket_like_rpc.open()
108136
response = await self.socket_like_rpc.recv()
109-
if self.generation_number is None:
110-
self.generation_number = response.metadata.generation
111137

112-
self.read_handle = response.read_handle
138+
if response and response.metadata:
139+
if self.generation_number is None and response.metadata.generation:
140+
self.generation_number = response.metadata.generation
141+
142+
if response and response.read_handle and response.read_handle.handle:
143+
self.read_handle = response.read_handle.handle
113144

114145
self._is_stream_open = True
115146

google/cloud/storage/_experimental/asyncio/retry/reads_resumption_strategy.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -140,3 +140,6 @@ async def recover_state_on_failure(self, error: Exception, state: Any) -> None:
140140
cause = getattr(error, "cause", error)
141141
if isinstance(cause, BidiReadObjectRedirectedError):
142142
state["routing_token"] = cause.routing_token
143+
if cause.read_handle and cause.read_handle.handle:
144+
state["read_handle"] = cause.read_handle.handle
145+
print(f"Recover state: Updated read_handle from redirect: {state['read_handle']}")

0 commit comments

Comments
 (0)