Skip to content

Commit d5aa5c2

Browse files
authoredJan 21, 2019
Merge pull request #10 from madtibo/load_data_as_json
add a '-j' switch to be able to load the loaded data as jsonb field
2 parents 662df38 + 47759e7 commit d5aa5c2

9 files changed

+46
-24
lines changed
 

‎load_into_pg.py

+28-14
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import psycopg2 as pg
66
import row_processor as Processor
77
import six
8+
import json
89

910
# Special rules needed for certain tables (esp. for old database dumps)
1011
specialRules = {
@@ -15,17 +16,17 @@ def _makeDefValues(keys):
1516
"""Returns a dictionary containing None for all keys."""
1617
return dict(( (k, None) for k in keys ))
1718

18-
def _createMogrificationTemplate(table, keys):
19+
def _createMogrificationTemplate(table, keys, insertJson):
1920
"""Return the template string for mogrification for the given keys."""
20-
return ( '(' +
21-
', '.join( [ '%(' + k + ')s' if (table, k) not in specialRules else specialRules[table, k]
22-
for k in keys
23-
]
24-
) +
25-
')'
26-
)
27-
28-
def _createCmdTuple(cursor, keys, templ, attribs):
21+
table_keys = ', '.join( [ '%(' + k + ')s' if (table, k) not in specialRules
22+
else specialRules[table, k]
23+
for k in keys ])
24+
if insertJson:
25+
return ('(' + table_keys + ', %(jsonfield)s' + ')')
26+
else:
27+
return ('(' + table_keys + ')')
28+
29+
def _createCmdTuple(cursor, keys, templ, attribs, insertJson):
2930
"""Use the cursor to mogrify a tuple of data.
3031
The passed data in `attribs` is augmented with default data (NULLs) and the
3132
order of data in the tuple is the same as in the list of `keys`. The
@@ -34,12 +35,20 @@ def _createCmdTuple(cursor, keys, templ, attribs):
3435
"""
3536
defs = _makeDefValues(keys)
3637
defs.update(attribs)
38+
39+
if insertJson:
40+
dict_attribs = { }
41+
for name, value in attribs.items():
42+
dict_attribs[name] = value
43+
defs['jsonfield'] = json.dumps(dict_attribs)
44+
45+
values_to_insert = cursor.mogrify(templ, defs)
3746
return cursor.mogrify(templ, defs)
3847

39-
def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPassword):
48+
def handleTable(table, keys, insertJson, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPassword):
4049
"""Handle the table including the post/pre processing."""
4150
dbFile = mbDbFile if mbDbFile is not None else table + '.xml'
42-
tmpl = _createMogrificationTemplate(table, keys)
51+
tmpl = _createMogrificationTemplate(table, keys, insertJson)
4352
start_time = time.time()
4453

4554
try:
@@ -82,7 +91,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
8291
six.print_('Processing data ...')
8392
for rows in Processor.batch(Processor.parse(xml), 500):
8493
valuesStr = ',\n'.join(
85-
[ _createCmdTuple(cur, keys, tmpl, row_attribs).decode('utf-8')
94+
[ _createCmdTuple(cur, keys, tmpl, row_attribs, insertJson).decode('utf-8')
8695
for row_attribs in rows
8796
]
8897
)
@@ -159,6 +168,11 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
159168
, default = False
160169
)
161170

171+
parser.add_argument( '-j', '--insert-json'
172+
, help = 'Insert raw data as JSON.'
173+
, action = 'store_true'
174+
, default = False
175+
)
162176
args = parser.parse_args()
163177

164178
table = args.table
@@ -279,7 +293,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
279293
choice = input('This will drop the {} table. Are you sure [y/n]?'.format(table))
280294

281295
if len(choice) > 0 and choice[0].lower() == 'y':
282-
handleTable(table, keys, args.dbname, args.file, args.host, args.port, args.username, args.password)
296+
handleTable(table, keys, args.insert_json, args.dbname, args.file, args.host, args.port, args.username, args.password)
283297
else:
284298
six.print_("Cancelled.")
285299

‎sql/Badges_pre.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,6 @@ CREATE TABLE Badges (
33
Id int PRIMARY KEY ,
44
UserId int not NULL ,
55
Name text not NULL ,
6-
Date timestamp not NULL
6+
Date timestamp not NULL ,
7+
jsonfield jsonb
78
);

‎sql/Comments_pre.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,6 @@ CREATE TABLE Comments (
55
Score int not NULL ,
66
Text text ,
77
CreationDate timestamp not NULL ,
8-
UserId int
8+
UserId int ,
9+
jsonfield jsonb
910
);

‎sql/PostHistory_pre.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -6,5 +6,6 @@ CREATE TABLE PostHistory (
66
RevisionGUID text ,
77
CreationDate timestamp not NULL ,
88
UserId int ,
9-
PostText text
9+
PostText text ,
10+
jsonfield jsonb
1011
);

‎sql/PostLinks_pre.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,6 @@ CREATE TABLE PostLinks (
44
CreationDate timestamp not NUll ,
55
PostId int not NULL ,
66
RelatedPostId int not NULL ,
7-
LinkTypeId int not Null
7+
LinkTypeId int not Null ,
8+
jsonfield jsonb
89
);

‎sql/Posts_pre.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ CREATE TABLE Posts (
1919
CommentCount int ,
2020
FavoriteCount int ,
2121
ClosedDate timestamp ,
22-
CommunityOwnedDate timestamp
22+
CommunityOwnedDate timestamp ,
23+
jsonfield jsonb
2324
);
2425

‎sql/Tags_pre.sql

+4-3
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ DROP TABLE IF EXISTS Tags CASCADE;
22
CREATE TABLE Tags (
33
Id int PRIMARY KEY ,
44
TagName text not NULL ,
5-
Count int,
6-
ExcerptPostId int,
7-
WikiPostId int
5+
Count int ,
6+
ExcerptPostId int ,
7+
WikiPostId int ,
8+
jsonfield jsonb
89
);

‎sql/Users_pre.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ CREATE TABLE Users (
1313
DownVotes int not NULL ,
1414
ProfileImageUrl text ,
1515
Age int ,
16-
AccountId int -- NULL accountId == deleted account?
16+
AccountId int , -- NULL accountId == deleted account?
17+
jsonfield jsonb
1718
);
1819

‎sql/Votes_pre.sql

+2-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ CREATE TABLE Votes (
55
VoteTypeId int not NULL ,
66
UserId int ,
77
CreationDate timestamp not NULL ,
8-
BountyAmount int
8+
BountyAmount int ,
9+
jsonfield jsonb
910
);
1011

0 commit comments

Comments
 (0)
Please sign in to comment.