Skip to content

Commit c949c7d

Browse files
committed
[script][m]: - script to modify invalid datapackage.json
1 parent 173ca4d commit c949c7d

File tree

2 files changed

+148
-0
lines changed

2 files changed

+148
-0
lines changed

examples/modify.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
import json
2+
3+
# This script modifies date type format to "any" in fields list.
4+
# Also, changes unsupported number types to "number"
5+
# Plus, modifies ojbects to list of objects
6+
7+
class Modify(object):
8+
def __init__(self, path='datapackage.json'):
9+
self.path = path
10+
11+
def transform_package(self, data):
12+
'''Transform data
13+
'''
14+
unsupported_number_types = [
15+
'decimal', 'double', 'float', 'binary'
16+
]
17+
for item in data:
18+
if type(data[item]) is dict:
19+
data[item] = [data[item]]
20+
for res_idx in range (len(data['resources'])):
21+
fields = data['resources'][res_idx]['schema']['fields']
22+
for field_idx in range(len(fields)):
23+
field = data['resources'][res_idx]['schema']['fields'][field_idx]
24+
if field['type'] == 'date':
25+
data['resources'][res_idx]['schema']['fields'][field_idx]['format'] = "any"
26+
if field['type'] in unsupported_number_types:
27+
data['resources'][res_idx]['schema']['fields'][field_idx]['type'] = "number"
28+
return data
29+
30+
def modify(self):
31+
'''Rewrites datapackage.json
32+
'''
33+
transformed_data = self.transform_package(json.load(open(self.path)))
34+
jsonfile = open(self.path , 'w')
35+
json.dump(transformed_data, jsonfile, indent=2)
36+
37+
def show(self):
38+
'''See modified datapackage.json as a json string
39+
'''
40+
transformed_data = self.transform_package(json.load(open(self.path)))
41+
print (json.dumps(transformed_data, indent=2))
42+
43+
## ==============================================
44+
## CLI
45+
46+
import sys
47+
import optparse
48+
import inspect
49+
50+
def _object_methods(obj):
51+
methods = inspect.getmembers(obj, inspect.ismethod)
52+
methods = filter(lambda (name, y): not name.startswith('_'), methods)
53+
methods = dict(methods)
54+
return methods
55+
56+
def _main(functions_or_object):
57+
isobject = inspect.isclass(functions_or_object)
58+
if isobject:
59+
_methods = _object_methods(functions_or_object)
60+
else:
61+
_methods = _module_functions(functions_or_object)
62+
63+
usage = '''%prog {action}
64+
Actions:
65+
'''
66+
usage += '\n '.join(
67+
[ '%s: %s' % (name, m.__doc__.split('\n')[0] if m.__doc__ else '') for (name,m)
68+
in sorted(_methods.items()) ])
69+
parser = optparse.OptionParser(usage)
70+
# Optional: for a config file
71+
# parser.add_option('-c', '--config', dest='config',
72+
# help='Config file to use.')
73+
options, args = parser.parse_args()
74+
75+
if not args or not args[0] in _methods:
76+
parser.print_help()
77+
sys.exit(1)
78+
79+
method = args[0]
80+
if isobject:
81+
getattr(functions_or_object(), method)(*args[1:])
82+
else:
83+
_methods[method](*args[1:])
84+
85+
if __name__ == '__main__':
86+
_main(Modify)

examples/modify_test.py

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
import unittest
2+
from modify import Modify
3+
4+
class HelpersTestCase(unittest.TestCase):
5+
def setUp(self):
6+
self.to_modify = Modify()
7+
8+
def test_modify_date_format(self):
9+
expected = "any"
10+
dp = {'resources': [{"schema": {"fields": [{"name": "test","type": "date"}]}}]}
11+
datapackage = self.to_modify.transform_package(dp)
12+
result = datapackage['resources'][0]['schema']['fields'][0]['format']
13+
self.assertEqual(result, expected)
14+
15+
def test_modify_date_format_works_with_multiple_date_fields(self):
16+
expected = "any"
17+
dp = {
18+
'resources': [{
19+
"schema": {"fields": [
20+
{"name": "test","type": "date"},
21+
{"name": "another-test","type": "date", "format": 'YYYY'}
22+
]}
23+
}]
24+
}
25+
datapackage = self.to_modify.transform_package(dp)
26+
result_1 = datapackage['resources'][0]['schema']['fields'][0]['format']
27+
result_2 = datapackage['resources'][0]['schema']['fields'][1]['format']
28+
self.assertEqual(result_1, expected)
29+
self.assertEqual(result_2, expected)
30+
31+
def test_modify_date_format_works_if_format_is_wrong(self):
32+
expected = "any"
33+
dp = {
34+
'resources': [{
35+
"schema": {"fields": [{"name": "test","type": "date", "format": 'YYYY'}]}
36+
}]
37+
}
38+
datapackage = self.to_modify.transform_package(dp)
39+
result = datapackage['resources'][0]['schema']['fields'][0]['format']
40+
self.assertEqual(result, expected)
41+
42+
def test_transform_works_if_number_type_is_wrong(self):
43+
expected = 'number'
44+
dp = {'resources': [{"schema": {"fields": [{"name": "test","type": "decimal"}]}}]}
45+
datapackage = self.to_modify.transform_package(dp)
46+
result = datapackage['resources'][0]['schema']['fields'][0]['type']
47+
self.assertEqual(result, expected)
48+
49+
dp = {'resources': [{"schema": {"fields": [{"name": "test","type": "float"}]}}]}
50+
datapackage = self.to_modify.transform_package(dp)
51+
result = datapackage['resources'][0]['schema']['fields'][0]['type']
52+
self.assertEqual(result, expected)
53+
54+
def test_turns_into_list_if_datapackage_keys_are_objects(self):
55+
dp = {
56+
'resources': {"schema": {"fields": [{"name": "test","type": "decimal"}]}},
57+
'licenses': { "id": "test", "title": "test-license", "url": "test.com"},
58+
'sources': {'name': 'test', 'web': 'test.com'}
59+
}
60+
datapackage = self.to_modify.transform_package(dp)
61+
self.assertEqual(type(datapackage['resources']), list)
62+
self.assertEqual(type(datapackage['licenses']), list)

0 commit comments

Comments
 (0)