Skip to content

Commit

Permalink
updated processing of exceptions to be a bit more flexible and added …
Browse files Browse the repository at this point in the history
…test cases
  • Loading branch information
aschroed committed Aug 23, 2023
1 parent cfd4ba7 commit 89dd17d
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 25 deletions.
39 changes: 25 additions & 14 deletions tests/test_import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -280,21 +280,32 @@ def test_combine_set_expsets_with_existing():


def test_error_report(connection_mock):
# There are 3 errors, 2 of them are legit, one needs to be checked afains the all aliases list, and excluded
err_dict = {"title": "Unprocessable Entity",
"status": "error",
"errors": [
{"name": "protocol_documents",
"description": "'dcic:insituhicagar' not found", "location": "body"},
{"name": "age",
"description": "'at' is not of type 'number'", "location": "body"},
{"name": "sex",
"description": "'green' is not one of ['male', 'female', 'unknown', 'mixed']", "location": "body"}],
"code": 422,
"@type": ["ValidationFailure", "Error"],
"description": "Failed validation"}
rep = imp.error_report(err_dict, "Vendor", ['dcic:insituhicagar'], connection_mock)
# There are x errors, x of them are legit, need to be checked against the all aliases list, and excluded
err_dict = {
"title": "Unprocessable Entity",
"status": "error",
"errors": [
{"name": "Schema: ", "location": "body",
"description": "Unable to resolve link: siyuan-wang-lab:region_1MB_TAD_1"},
{"name": "Schema: ", "location": "body",
"description": "Unable to resolve link: siyuan-wang-lab:region_5MB_TAD_2"},
{"location": "body", "name": "Schema: genome_location.1",
"description": "'siyuan-wang-lab:region_5MB_TAD_2' not found"},
{"name": "protocol_documents",
"description": "'dcic:insituhicagar' not found", "location": "body"},
{"name": "age",
"description": "'at' is not of type 'number'", "location": "body"},
{"name": "sex",
"description": "'green' is not one of ['male', 'female', 'unknown', 'mixed']", "location": "body"}
],
"code": 422,
"@type": ["ValidationFailure", "Error"],
"description": "Failed validation"
}
rep = imp.error_report(err_dict, "Vendor", ['dcic:insituhicagar', 'siyuan-wang-lab:region_1MB_TAD_1'], connection_mock)
message = '''
ERROR vendor Field 'Schema: ': Unable to resolve link: siyuan-wang-lab:region_5MB_TAD_2
ERROR vendor Field 'Schema: genome_location.1': 'siyuan-wang-lab:region_5MB_TAD_2' not found
ERROR vendor Field 'age': 'at' is not of type 'number'
ERROR vendor Field 'sex': 'green' is not one of ['male', 'female', 'unknown', 'mixed']
'''
Expand Down
28 changes: 17 additions & 11 deletions wranglertools/import_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -779,23 +779,29 @@ def error_report(error_dic, sheet, all_aliases, connection, error_id=''):
'title': 'Unprocessable Entity', 'description': 'Failed validation'}
report = []
if all(item in error_dic.items() for item in error_header.items()):
for err in error_dic['errors']:
error_description = err['description']
# if no field specified in the error, schema wide error
if not err['name']:
# deal with Validation errors
for err in error_dic.get('errors'):
error_description = err.get('description')
# this may no longer ever happen?
if 'name' not in err or not err.get('name'):
report.append("{sheet:<30}{des}"
.format(des=error_description, sheet="ERROR " + sheet.lower()))
else:
# field errors
not_found = None
# deal with errors about linked objects not in db - checking for those with
# aliases present in the workbook that should be ignored
utrl_txt = 'Unable to resolve link:'
if utrl_txt in error_description:
nf_txt = 'not found'
not_found = None
alias_bit = None
if error_id:
alias_bit = error_id
elif utrl_txt in error_description:
alias_bit = error_description.replace(utrl_txt, '')
elif error_description.endswith(nf_txt):
alias_bit = error_description.replace(nf_txt, '').replace("'", '')
if alias_bit:
not_found = alias_bit.strip()
elif error_description[-9:] == 'not found':
# if error is about object connections, check all aliases
# ignore ones about existing aliases
not_found = error_description[1:-11]
# ignore ones about existing aliases
if not_found and not_found in all_aliases:
continue
error_field = err['name']
Expand Down

0 comments on commit 89dd17d

Please sign in to comment.