Skip to content

Commit 1d5ea16

Browse files
nateshim-indicoNathanael Shim
andauthored
[DEV-11456] make request chains wait appropriately for sync/async calls (#298)
* refactor wait logic * edits * formatting * check for debouncer instance instead of float or int; update typing; add kwarg max_wait_time for all calls using wait * fix random typo in createexport; add debouncer to rest of uqeries * update typing tuple to union; update docstrings * update timer logic * address comments * change max_wait_time to request_interval * correct job docstring * address comments * address comments --------- Co-authored-by: Nathanael Shim <[email protected]>
1 parent 74326f4 commit 1d5ea16

File tree

10 files changed

+148
-119
lines changed

10 files changed

+148
-119
lines changed

examples/aio_client.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,4 +47,4 @@ async def example_1(client):
4747

4848
if __name__ == "__main__":
4949
# How to run a Python script using async
50-
asyncio.run(example_with_client)
50+
asyncio.run(example_with_client())

indico/client/client.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,21 @@
11
# -*- coding: utf-8 -*-
22

3-
from typing import Union, Optional
3+
import asyncio
4+
import time
5+
from typing import Optional, Union
6+
47
import urllib3
58

6-
from indico.config import IndicoConfig
7-
from indico.errors import IndicoError
8-
from indico.http.client import HTTPClient, AIOHTTPClient
99
from indico.client.request import (
10+
GraphQLRequest,
1011
HTTPRequest,
11-
RequestChain,
1212
PagedRequest,
13-
GraphQLRequest,
13+
RequestChain,
1414
)
15+
from indico.config import IndicoConfig
16+
from indico.errors import IndicoError
17+
from indico.client.request import Delay
18+
from indico.http.client import AIOHTTPClient, HTTPClient
1519

1620

1721
class IndicoClient:
@@ -47,7 +51,8 @@ def _handle_request_chain(self, chain: RequestChain):
4751
elif isinstance(request, RequestChain):
4852
response = self._handle_request_chain(request)
4953
chain.previous = response
50-
54+
elif isinstance(request, Delay):
55+
time.sleep(request.seconds)
5156
if chain.result:
5257
return chain.result
5358
return response
@@ -147,7 +152,8 @@ async def _handle_request_chain(self, chain: RequestChain):
147152
elif isinstance(request, RequestChain):
148153
response = await self._handle_request_chain(request)
149154
chain.previous = response
150-
155+
elif isinstance(request, Delay):
156+
await asyncio.sleep(request.seconds)
151157
if chain.result:
152158
return chain.result
153159
return response

indico/client/request.py

Lines changed: 5 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
from typing import Dict, Any
21
from enum import Enum
2+
from typing import Any, Dict, Union
3+
34
from indico.errors import IndicoRequestError
4-
import time
55

66

77
class HTTPMethod(Enum):
@@ -89,15 +89,6 @@ def requests(self):
8989
pass
9090

9191

92-
class Debouncer:
93-
def __init__(self, max_timeout: int = 5):
94-
self.timeout = 0
95-
self.max_timeout = max_timeout or 5 # prevent None and 0
96-
97-
def backoff(self):
98-
self.increment_timeout()
99-
time.sleep(self.timeout)
100-
101-
def increment_timeout(self):
102-
if self.timeout < self.max_timeout:
103-
self.timeout += 1
92+
class Delay:
93+
def __init__(self, seconds: Union[int, float] = 2):
94+
self.seconds = seconds

indico/queries/datasets.py

Lines changed: 30 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,32 @@
11
# -*- coding: utf-8 -*-
22

33
import json
4-
import jsons
54
import tempfile
65
from pathlib import Path
7-
from typing import List, Union, Dict, Optional
6+
from typing import Dict, List, Optional, Union
87

9-
import pandas as pd
108
import deprecation
9+
import jsons
10+
import pandas as pd
1111

1212
from indico.client.request import (
13-
Debouncer,
13+
Delay,
1414
GraphQLRequest,
1515
HTTPMethod,
1616
HTTPRequest,
1717
PagedRequest,
1818
RequestChain,
1919
)
20-
from indico.errors import IndicoNotFound, IndicoInputError
20+
from indico.errors import IndicoInputError, IndicoNotFound
21+
from indico.filters import DatasetFilter
2122
from indico.queries.storage import UploadBatched, UploadImages
2223
from indico.types.dataset import (
2324
Dataset,
2425
OcrEngine,
26+
OcrInputLanguage,
2527
OmnipageOcrOptionsInput,
2628
ReadApiOcrOptionsInput,
27-
OcrInputLanguage,
2829
)
29-
from indico.filters import DatasetFilter
3030

3131

3232
class ListDatasets(PagedRequest):
@@ -196,12 +196,17 @@ class CreateDataset(RequestChain):
196196
Create a dataset and upload the associated files.
197197
198198
Args:
199-
name (str): Name of the dataset
200-
files (List[str]): List of pathnames to the dataset files
201-
202-
Options:
203-
dataset_type (str): Type of dataset to create [TEXT, DOCUMENT, IMAGE]
204-
wait (bool, default=True): Wait for the dataset to upload and finish
199+
name (str): Name of the dataset.
200+
files (List[str]): List of path names to the dataset files.
201+
wait (bool, optional): Wait for the dataset to upload and finish. Defaults to True.
202+
dataset_type (str, optional): Type of dataset to create [TEXT, DOCUMENT, IMAGE]. Defaults to TEXT.
203+
from_local_images (bool, optional): Flag whether files are local images or not. Defaults to False.
204+
image_filename_col (str, optional): Image filename column. Defaults to 'filename'.
205+
batch_size (int, optional): Size of file batch to upload at a time. Defaults to 20.
206+
ocr_engine (OcrEngine, optional): Specify an OCR engine [OMNIPAGE, READAPI, READAPI_V2, READAPI_TABLES_V1]. Defaults to None.
207+
omnipage_ocr_options (OmnipageOcrOptionsInput, optional): If using Omnipage, specify Omnipage OCR options. Defaults to None.
208+
read_api_ocr_options: (ReadApiOcrOptionsInput, optional): If using ReadAPI, specify ReadAPI OCR options. Defaults to None.
209+
request_interval (int or float, optional): The maximum time in between retry calls when waiting. Defaults to 5 seconds.
205210
206211
Returns:
207212
Dataset object
@@ -222,6 +227,7 @@ def __init__(
222227
ocr_engine: OcrEngine = None,
223228
omnipage_ocr_options: OmnipageOcrOptionsInput = None,
224229
read_api_ocr_options: ReadApiOcrOptionsInput = None,
230+
request_interval: Union[int, float] = 5,
225231
):
226232
self.files = files
227233
self.name = name
@@ -233,6 +239,7 @@ def __init__(
233239
self.ocr_engine = ocr_engine
234240
self.omnipage_ocr_options = omnipage_ocr_options
235241
self.read_api_ocr_options = read_api_ocr_options
242+
self.request_interval = request_interval
236243
if omnipage_ocr_options is not None and read_api_ocr_options is not None:
237244
raise IndicoInputError(
238245
"Must supply either omnipage or readapi options but not both."
@@ -278,13 +285,12 @@ def requests(self):
278285
)
279286
dataset_id = self.previous.id
280287
yield GetDatasetFileStatus(id=dataset_id)
281-
debouncer = Debouncer()
282288
if self.wait is True:
283289
while not all(
284290
[f.status in ["PROCESSED", "FAILED"] for f in self.previous.files]
285291
):
286292
yield GetDatasetFileStatus(id=dataset_id)
287-
debouncer.backoff()
293+
yield Delay(seconds=self.request_interval)
288294
yield GetDataset(id=dataset_id)
289295

290296

@@ -475,12 +481,11 @@ def requests(self):
475481
)
476482
yield GetDatasetFileStatus(id=self.dataset_id)
477483
if self.wait:
478-
debouncer = Debouncer()
479484
while not all(
480485
f.status in self.expected_statuses for f in self.previous.files
481486
):
482487
yield GetDatasetFileStatus(id=self.previous.id)
483-
debouncer.backoff()
488+
yield Delay()
484489

485490

486491
# Alias for backwards compatibility
@@ -538,9 +543,10 @@ class ProcessFiles(RequestChain):
538543
Process files associated with a dataset and add corresponding data to the dataset
539544
540545
Args:
541-
dataset_id (int): ID of the dataset
542-
datafile_ids (List[str]): IDs of the datafiles to process
543-
wait (bool): Block while polling for status of files
546+
dataset_id (int): ID of the dataset.
547+
datafile_ids (List[str]): IDs of the datafiles to process.
548+
wait (bool, optional): Block while polling for status of files. Defaults to True.
549+
request_interval (int or float, optional): The maximum time in between retry calls when waiting. Defaults to 5 seconds.
544550
545551
546552
Returns:
@@ -552,21 +558,22 @@ def __init__(
552558
dataset_id: int,
553559
datafile_ids: List[int],
554560
wait: bool = True,
561+
request_interval: Union[int, float] = 5,
555562
):
556563
self.dataset_id = dataset_id
557564
self.datafile_ids = datafile_ids
558565
self.wait = wait
566+
self.request_interval = request_interval
559567

560568
def requests(self):
561569
yield _ProcessFiles(self.dataset_id, self.datafile_ids)
562-
debouncer = Debouncer()
563570
yield GetDatasetFileStatus(id=self.dataset_id)
564571
if self.wait:
565572
while not all(
566573
f.status in ["PROCESSED", "FAILED"] for f in self.previous.files
567574
):
568575
yield GetDatasetFileStatus(id=self.dataset_id)
569-
debouncer.backoff()
576+
yield Delay(seconds=self.request_interval)
570577

571578

572579
@deprecation.deprecated(
@@ -593,14 +600,13 @@ def __init__(self, dataset_id: int, datafile_ids: List[int], wait: bool = True):
593600

594601
def requests(self):
595602
yield _ProcessCSV(self.dataset_id, self.datafile_ids)
596-
debouncer = Debouncer()
597603
yield GetDatasetFileStatus(id=self.dataset_id)
598604
if self.wait:
599605
while not all(
600606
f.status in ["PROCESSED", "FAILED"] for f in self.previous.files
601607
):
602608
yield GetDatasetFileStatus(id=self.dataset_id)
603-
debouncer.backoff()
609+
yield Delay()
604610

605611

606612
class GetAvailableOcrEngines(GraphQLRequest):

indico/queries/export.py

Lines changed: 31 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,21 @@
1-
import pandas as pd
21
import io
2+
import warnings
33
from typing import List, Union
44

5-
from indico.client import GraphQLRequest, RequestChain, Debouncer
5+
import pandas as pd
6+
7+
from indico.client import Delay, GraphQLRequest, RequestChain
68
from indico.errors import IndicoNotFound, IndicoRequestError
7-
from indico.types.export import LabelResolutionStrategy, Export
89
from indico.queries.storage import RetrieveStorageObject
10+
from indico.types.export import Export, LabelResolutionStrategy
911

1012

1113
class _CreateExport(GraphQLRequest):
1214
query = """
1315
mutation CreateExport(
1416
$datasetId: Int!,
1517
$labelsetId: Int!,
16-
$columnIds: [Int],
18+
$columnIds: [Int],
1719
$modelIds: [Int],
1820
$frozenLabelsetIds: [Int],
1921
$combineLabels: LabelResolutionStrategy,
@@ -55,7 +57,16 @@ def __init__(
5557
combine_labels: LabelResolutionStrategy = LabelResolutionStrategy.ALL.name,
5658
file_info: bool = None,
5759
anonymoous: bool = None,
60+
anonymous: bool = None,
5861
):
62+
if anonymoous:
63+
warnings.warn(
64+
"Argument anonymoous is deprecated and will be removed in future versions. Use argument anonymous instead."
65+
)
66+
if anonymous:
67+
raise IndicoRequestError("Cannot use both anonymoous and anonymous.")
68+
else:
69+
anonymous = anonymoous
5970
super().__init__(
6071
self.query,
6172
variables={
@@ -66,7 +77,7 @@ def __init__(
6677
"frozenLabelsetIds": frozen_labelset_ids,
6778
"combineLabels": combine_labels,
6879
"fileInfo": file_info,
69-
"anonymous": anonymoous,
80+
"anonymous": anonymous,
7081
},
7182
)
7283

@@ -93,7 +104,7 @@ class GetExport(GraphQLRequest):
93104
exports {
94105
id
95106
datasetId
96-
name
107+
name
97108
status
98109
columnIds
99110
labelsetId
@@ -165,15 +176,16 @@ class CreateExport(RequestChain):
165176
Create an export job for a dataset.
166177
167178
Args:
168-
dataset_id (int): Dataset to create the export for
169-
labelset_id (int): Labelset column id to export
170-
column_ids (List(int)): Data column ids to export
171-
model_ids (List(int)): Model ids to include predictions from
172-
frozen_labelset_ids: (List(int)): frozen labelset ids to limit examples by
173-
combine_labels (LabelResolutionStrategy): One row per example, combine labels from multiple labels into a single row
174-
file_info (bool): Include datafile information
175-
anonymous (bool): Anonymize user information
176-
wait (bool): Wait for the export to complete. Default is True
179+
dataset_id (int): Dataset to create the export for.
180+
labelset_id (int): Labelset column id to export.
181+
column_ids (List(int), optional): Data column ids to export. Defaults to None.
182+
model_ids (List(int), optional): Model ids to include predictions from. Defaults to None.
183+
frozen_labelset_ids: (List(int), optional): frozen labelset ids to limit examples by. Defaults to None.
184+
combine_labels (LabelResolutionStrategy, optional): One row per example, combine labels from multiple labels into a single row. Defaults to 'all'.
185+
file_info (bool, optional): Include datafile information. Defaults to False.
186+
anonymous (bool, optional): Anonymize user information. Defaults to False.
187+
wait (bool, optional): Wait for the export to complete. Defaults to True.
188+
request_interval (int or float, optional): The maximum time in between retry calls when waiting. Defaults to 5 seconds.
177189
178190
Returns:
179191
Export object
@@ -193,6 +205,7 @@ def __init__(
193205
file_info: bool = False,
194206
anonymous: bool = False,
195207
wait: bool = True,
208+
request_interval: Union[int, float] = 5,
196209
):
197210
self.dataset_id = dataset_id
198211
self.labelset_id = labelset_id
@@ -203,6 +216,7 @@ def __init__(
203216
self.file_info = file_info
204217
self.anonymous = anonymous
205218
self.wait = wait
219+
self.request_interval = request_interval
206220
super().__init__()
207221

208222
def requests(self):
@@ -214,12 +228,11 @@ def requests(self):
214228
frozen_labelset_ids=self.frozen_labelset_ids,
215229
combine_labels=self.combine_labels,
216230
file_info=self.file_info,
217-
anonymoous=self.anonymous,
231+
anonymous=self.anonymous,
218232
)
219-
debouncer = Debouncer()
220233
if self.wait is True:
221234
while self.previous.status not in ["COMPLETE", "FAILED"]:
222235
yield GetExport(self.previous.id)
223-
debouncer.backoff()
236+
yield Delay(seconds=self.request_interval)
224237

225238
yield GetExport(self.previous.id)

0 commit comments

Comments
 (0)