Skip to content

Commit 5ae3e32

Browse files
authored
Merge pull request #162 from roboflow/handle-upload-502
Make uploading large datasets more robust
2 parents bbf5fa5 + 9e0a5f3 commit 5ae3e32

File tree

4 files changed

+116
-92
lines changed

4 files changed

+116
-92
lines changed

roboflow/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from roboflow.core.workspace import Workspace
1313
from roboflow.util.general import write_line
1414

15-
__version__ = "1.1.2"
15+
__version__ = "1.1.3"
1616

1717

1818
def check_key(api_key, model, notebook, num_retries=0):

roboflow/core/project.py

+95-90
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
from roboflow.config import API_URL, DEFAULT_BATCH_NAME, DEMO_KEYS
1515
from roboflow.core.version import Version
16+
from roboflow.util.general import retry
1617

1718
ACCEPTED_IMAGE_FORMATS = ["PNG", "JPEG"]
1819

@@ -25,6 +26,10 @@ def custom_formatwarning(msg, *args, **kwargs):
2526
warnings.formatwarning = custom_formatwarning
2627

2728

29+
class UploadError(Exception):
30+
pass
31+
32+
2833
class Project:
2934
def __init__(self, api_key, a_project, model_format=None):
3035
if api_key in DEMO_KEYS:
@@ -342,9 +347,30 @@ def __image_upload(
342347
)
343348
# Get response
344349
response = requests.post(upload_url)
345-
# Return response
346-
347-
return response
350+
responsejson = None
351+
try:
352+
responsejson = response.json()
353+
except:
354+
pass
355+
if response.status_code == 200:
356+
if responsejson:
357+
if "duplicate" in responsejson.keys():
358+
print(f"Duplicate image not uploaded: {image_path}")
359+
elif not responsejson.get("success"):
360+
raise UploadError(f"Server rejected image: {responsejson}")
361+
return responsejson.get("id")
362+
else:
363+
warnings.warn(
364+
f"upload image {image_path} 200 OK, weird response: {response}"
365+
)
366+
return None
367+
else:
368+
if responsejson:
369+
raise UploadError(
370+
f"Bad response: {response.status_code} - {responsejson}"
371+
)
372+
else:
373+
raise UploadError(f"Bad response: {response}")
348374

349375
def __annotation_upload(
350376
self, annotation_path: str, image_id: str, is_prediction: bool = False
@@ -378,10 +404,6 @@ def __annotation_upload(
378404
"result": "File not found or uploading to non-classification type project with invalid string"
379405
}
380406

381-
# Set annotation upload url
382-
383-
project_name = self.id.rsplit("/")[1]
384-
385407
self.annotation_upload_url = "".join(
386408
[
387409
API_URL + "/dataset/",
@@ -395,15 +417,48 @@ def __annotation_upload(
395417
]
396418
)
397419

398-
# Get annotation response
399-
annotation_response = requests.post(
420+
response = requests.post(
400421
self.annotation_upload_url,
401422
data=annotation_string,
402423
headers={"Content-Type": "text/plain"},
403424
)
404-
405-
# Return annotation response
406-
return annotation_response
425+
responsejson = None
426+
try:
427+
responsejson = response.json()
428+
except:
429+
pass
430+
if response.status_code == 200:
431+
if responsejson:
432+
if responsejson.get("error"):
433+
raise UploadError(
434+
f"Failed to save annotation for {image_id}: {responsejson['error']}"
435+
)
436+
elif not responsejson.get("success"):
437+
raise UploadError(
438+
f"Failed to save annotation for {image_id}: {responsejson}"
439+
)
440+
else:
441+
warnings.warn(
442+
f"save annotation {annotation_path} 200 OK, weird response: {response}"
443+
)
444+
elif response.status_code == 409 and "already annotated" in (
445+
responsejson or {}
446+
).get("error", {}).get("message"):
447+
print(f"image already annotated: {annotation_path}")
448+
else:
449+
if responsejson:
450+
if responsejson.get("error"):
451+
raise UploadError(
452+
f"save annotation for {image_id} / bad response: {response.status_code} - {responsejson['error']}"
453+
)
454+
else:
455+
raise UploadError(
456+
f"save annotation for {image_id} / bad response: {response.status_code} - {responsejson}"
457+
)
458+
else:
459+
raise UploadError(
460+
f"save annotation for {image_id} bad response: {response}"
461+
)
407462

408463
def check_valid_image(self, image_path):
409464
try:
@@ -420,7 +475,7 @@ def upload(
420475
image_path: str = None,
421476
annotation_path: str = None,
422477
hosted_image: bool = False,
423-
image_id: int = None,
478+
image_id: str = None,
424479
split: str = "train",
425480
num_retry_uploads: int = 0,
426481
batch_name: str = DEFAULT_BATCH_NAME,
@@ -434,7 +489,7 @@ def upload(
434489
image_path (str) - path to image you'd like to upload
435490
annotation_path (str) - if you're upload annotation, path to it
436491
hosted_image (bool) - whether the image is hosted
437-
image_id (int) - id of the image
492+
image_id (str) - id of the image
438493
split (str) - to upload the image to
439494
num_retry_uploads (int) - how many times to retry upload on failure
440495
batch_name (str) - name of batch to upload to within project
@@ -519,90 +574,40 @@ def single_upload(
519574
):
520575
success = False
521576
annotation_success = False
522-
# User gives image path
523577
if image_path is not None:
524-
# Upload Image Response
525-
response = self.__image_upload(
526-
image_path,
527-
hosted_image=hosted_image,
528-
split=split,
529-
batch_name=batch_name,
530-
tag_names=tag_names,
531-
**kwargs,
532-
)
533-
# Get JSON response values
534578
try:
535-
if "duplicate" in response.json().keys():
536-
if response.json()["duplicate"]:
537-
success = True
538-
warnings.warn("Duplicate image not uploaded: " + image_path)
539-
else:
540-
success, image_id = (
541-
response.json()["success"],
542-
response.json()["id"],
543-
)
544-
545-
if not success:
546-
warnings.warn(f"Server rejected image: {response.json()}")
547-
548-
except Exception:
549-
# Image fails to upload
550-
warnings.warn(f"Bad response: {response}")
551-
success = False
552-
# Give user warning that image failed to upload
553-
if not success:
554-
warnings.warn(
555-
"Upload api failed with response: " + str(response.json())
579+
image_id = retry(
580+
num_retry_uploads,
581+
Exception,
582+
self.__image_upload,
583+
image_path,
584+
hosted_image=hosted_image,
585+
split=split,
586+
batch_name=batch_name,
587+
tag_names=tag_names,
588+
**kwargs,
556589
)
557-
if num_retry_uploads > 0:
558-
warnings.warn(
559-
"Image, "
560-
+ image_path
561-
+ ", failed to upload! Retrying for this many times: "
562-
+ str(num_retry_uploads)
563-
)
564-
self.single_upload(
565-
image_path=image_path,
566-
annotation_path=annotation_path,
567-
hosted_image=hosted_image,
568-
image_id=image_id,
569-
split=split,
570-
num_retry_uploads=num_retry_uploads - 1,
571-
**kwargs,
572-
)
573-
return
574-
else:
575-
warnings.warn(
576-
"Image, "
577-
+ image_path
578-
+ ", failed to upload! You can specify num_retry_uploads to retry a number of times."
579-
)
590+
success = True
591+
except BaseException as e:
592+
print(
593+
f"{image_path} ERROR uploading image after {num_retry_uploads} retries: {e}",
594+
file=sys.stderr,
595+
)
596+
return
580597

581598
# Upload only annotations to image based on image Id (no image)
582599
if annotation_path is not None and image_id is not None and success:
583600
# Get annotation upload response
584-
annotation_response = self.__annotation_upload(
585-
annotation_path, image_id, is_prediction=is_prediction
586-
)
587-
# Check if upload was a success
588601
try:
589-
response_data = annotation_response.json()
590-
if "success" in response_data.keys():
591-
annotation_success = True
592-
elif "error" in response_data.keys():
593-
warnings.warn(
594-
f"Uploading annotation data for image failed: {str(response_data['error'])}"
595-
)
596-
annotation_success = False
597-
else:
598-
warnings.warn(
599-
f"Uploading annotation data for image failed: {str(response_data)}"
600-
)
601-
annotation_success = False
602-
except:
603-
warnings.warn(f"Bad response: {response.status_code}")
604-
annotation_success = False
605-
602+
self.__annotation_upload(
603+
annotation_path, image_id, is_prediction=is_prediction
604+
)
605+
annotation_success = True
606+
except BaseException as e:
607+
print(
608+
f"{annotation_path} ERROR saving annotation: {e}", file=sys.stderr
609+
)
610+
return False
606611
# Give user warning that annotation failed to upload
607612
if not annotation_success:
608613
warnings.warn(

roboflow/core/workspace.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -282,15 +282,17 @@ def upload_dataset(
282282

283283
dataset_path = dataset_path + "_voc"
284284

285-
if project_name in self.project_list:
285+
if project_name in [p["name"] for p in self.project_list]:
286286
dataset_upload_project = self.project(project_name)
287+
print(f"Uploading to existing project {dataset_upload_project.id}")
287288
else:
288289
dataset_upload_project = self.create_project(
289290
project_name,
290291
project_license=project_license,
291292
annotation=project_name,
292293
project_type=project_type,
293294
)
295+
print(f"Created project {dataset_upload_project.id}")
294296

295297
def upload_file(img_file, split):
296298
label_file = img_file.replace(".jpg", ".xml")
@@ -306,6 +308,7 @@ def parallel_upload(file_list, split):
306308
tqdm(
307309
executor.map(upload_file, file_list, [split] * len(file_list)),
308310
total=len(file_list),
311+
file=sys.stdout,
309312
)
310313
)
311314

roboflow/util/general.py

+16
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,19 @@ def write_line(line):
55
sys.stdout.write("\r" + line)
66
sys.stdout.write("\n")
77
sys.stdout.flush()
8+
9+
10+
def retry(max_retries, retry_on, func, *args, **kwargs):
11+
if not retry_on:
12+
retry_on = (Exception,)
13+
retries = 0
14+
while retries <= max_retries:
15+
try:
16+
return func(*args, **kwargs)
17+
except BaseException as e:
18+
if isinstance(e, retry_on):
19+
retries += 1
20+
if retries > max_retries:
21+
raise
22+
else:
23+
raise

0 commit comments

Comments
 (0)