Skip to content

Commit ab92755

Browse files
committed
add foreign key support for users id and posts id using the "--foreign-keys" switch
WARNING: when using the foreign keys option, some entries in votes and postlinks might be updated to enforce data integrity
1 parent 662df38 commit ab92755

13 files changed

+158
-121
lines changed

load_into_pg.py

Lines changed: 131 additions & 115 deletions
Original file line numberDiff line numberDiff line change
@@ -29,24 +29,131 @@ def _createCmdTuple(cursor, keys, templ, attribs):
2929
"""Use the cursor to mogrify a tuple of data.
3030
The passed data in `attribs` is augmented with default data (NULLs) and the
3131
order of data in the tuple is the same as in the list of `keys`. The
32-
`cursor` is used toe mogrify the data and the `templ` is the template used
32+
`cursor` is used to mogrify the data and the `templ` is the template used
3333
for the mogrification.
3434
"""
3535
defs = _makeDefValues(keys)
3636
defs.update(attribs)
3737
return cursor.mogrify(templ, defs)
3838

39+
def _getTableKeys(table):
40+
"""Return an array of the keys for a given table"""
41+
keys = None
42+
if table == 'Users':
43+
keys = [
44+
'Id'
45+
, 'Reputation'
46+
, 'CreationDate'
47+
, 'DisplayName'
48+
, 'LastAccessDate'
49+
, 'WebsiteUrl'
50+
, 'Location'
51+
, 'AboutMe'
52+
, 'Views'
53+
, 'UpVotes'
54+
, 'DownVotes'
55+
, 'ProfileImageUrl'
56+
, 'Age'
57+
, 'AccountId'
58+
]
59+
elif table == 'Badges':
60+
keys = [
61+
'Id'
62+
, 'UserId'
63+
, 'Name'
64+
, 'Date'
65+
]
66+
elif table == 'PostLinks':
67+
keys = [
68+
'Id'
69+
, 'CreationDate'
70+
, 'PostId'
71+
, 'RelatedPostId'
72+
, 'LinkTypeId'
73+
]
74+
elif table == 'Comments':
75+
keys = [
76+
'Id'
77+
, 'PostId'
78+
, 'Score'
79+
, 'Text'
80+
, 'CreationDate'
81+
, 'UserId'
82+
]
83+
elif table == 'Votes':
84+
keys = [
85+
'Id'
86+
, 'PostId'
87+
, 'VoteTypeId'
88+
, 'UserId'
89+
, 'CreationDate'
90+
, 'BountyAmount'
91+
]
92+
elif table == 'Posts':
93+
keys = [
94+
'Id'
95+
, 'PostTypeId'
96+
, 'AcceptedAnswerId'
97+
, 'ParentId'
98+
, 'CreationDate'
99+
, 'Score'
100+
, 'ViewCount'
101+
, 'Body'
102+
, 'OwnerUserId'
103+
, 'LastEditorUserId'
104+
, 'LastEditorDisplayName'
105+
, 'LastEditDate'
106+
, 'LastActivityDate'
107+
, 'Title'
108+
, 'Tags'
109+
, 'AnswerCount'
110+
, 'CommentCount'
111+
, 'FavoriteCount'
112+
, 'ClosedDate'
113+
, 'CommunityOwnedDate'
114+
]
115+
elif table == 'Tags':
116+
keys = [
117+
'Id'
118+
, 'TagName'
119+
, 'Count'
120+
, 'ExcerptPostId'
121+
, 'WikiPostId'
122+
]
123+
elif table == 'PostHistory':
124+
keys = [
125+
'Id',
126+
'PostHistoryTypeId',
127+
'PostId',
128+
'RevisionGUID',
129+
'CreationDate',
130+
'UserId',
131+
'Text'
132+
]
133+
elif table == 'Comments':
134+
keys = [
135+
'Id',
136+
'PostId',
137+
'Score',
138+
'Text',
139+
'CreationDate',
140+
'UserId',
141+
]
142+
return keys
143+
39144
def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPassword):
40145
"""Handle the table including the post/pre processing."""
146+
keys = _getTableKeys(table)
41147
dbFile = mbDbFile if mbDbFile is not None else table + '.xml'
42148
tmpl = _createMogrificationTemplate(table, keys)
43149
start_time = time.time()
44150

45151
try:
46152
pre = open('./sql/' + table + '_pre.sql').read()
47153
post = open('./sql/' + table + '_post.sql').read()
154+
fk = open('./sql/' + table + '_fk.sql').read()
48155
except IOError as e:
49-
six.print_("Could not load pre/post sql. Are you running from the correct path?", file=sys.stderr)
156+
six.print_("Could not load pre/post/fk sql. Are you running from the correct path?", file=sys.stderr)
50157
sys.exit(-1)
51158

52159
dbConnectionParam = "dbname={}".format(dbname)
@@ -65,6 +172,7 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
65172
if mbPassword is not None:
66173
dbConnectionParam += ' password={}'.format(mbPassword)
67174

175+
68176
try:
69177
with pg.connect(dbConnectionParam) as conn:
70178
with conn.cursor() as cur:
@@ -86,13 +194,12 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
86194
for row_attribs in rows
87195
]
88196
)
89-
90197
if len(valuesStr) > 0:
91198
cmd = 'INSERT INTO ' + table + \
92199
' VALUES\n' + valuesStr + ';'
93200
cur.execute(cmd)
94201
conn.commit()
95-
six.print_('Table processing took {:.1f} seconds'.format(time.time() - start_time))
202+
six.print_('Table processing took {1:.1f} seconds'.format(table, time.time() - start_time))
96203

97204
# Post-processing (creation of indexes)
98205
start_time = time.time()
@@ -101,6 +208,14 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
101208
cur.execute(post)
102209
conn.commit()
103210
six.print_('Post processing took {} seconds'.format(time.time() - start_time))
211+
if createFk:
212+
# fk-processing (creation of foreign keys)
213+
start_time = time.time()
214+
six.print_('fk processing ...')
215+
if post != '':
216+
cur.execute(fk)
217+
conn.commit()
218+
six.print_('fk processing took {} seconds'.format(time.time() - start_time))
104219

105220
except IOError as e:
106221
six.print_("Could not read from file {}.".format(dbFile), file=sys.stderr)
@@ -113,8 +228,6 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
113228
six.print_("Warning from the database.", file=sys.stderr)
114229
six.print_("pg.Warning: {0}".format(str(w)), file=sys.stderr)
115230

116-
117-
118231
#############################################################
119232

120233
parser = argparse.ArgumentParser()
@@ -159,127 +272,30 @@ def handleTable(table, keys, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPas
159272
, default = False
160273
)
161274

275+
parser.add_argument( '--foreign-keys'
276+
, help = 'Create foreign keys.'
277+
, action = 'store_true'
278+
, default = False
279+
)
280+
162281
args = parser.parse_args()
163282

164283
table = args.table
165-
keys = None
166-
167-
if table == 'Users':
168-
keys = [
169-
'Id'
170-
, 'Reputation'
171-
, 'CreationDate'
172-
, 'DisplayName'
173-
, 'LastAccessDate'
174-
, 'WebsiteUrl'
175-
, 'Location'
176-
, 'AboutMe'
177-
, 'Views'
178-
, 'UpVotes'
179-
, 'DownVotes'
180-
, 'ProfileImageUrl'
181-
, 'Age'
182-
, 'AccountId'
183-
]
184-
elif table == 'Badges':
185-
keys = [
186-
'Id'
187-
, 'UserId'
188-
, 'Name'
189-
, 'Date'
190-
]
191-
elif table == 'PostLinks':
192-
keys = [
193-
'Id'
194-
, 'CreationDate'
195-
, 'PostId'
196-
, 'RelatedPostId'
197-
, 'LinkTypeId'
198-
]
199-
elif table == 'Comments':
200-
keys = [
201-
'Id'
202-
, 'PostId'
203-
, 'Score'
204-
, 'Text'
205-
, 'CreationDate'
206-
, 'UserId'
207-
]
208-
elif table == 'Votes':
209-
keys = [
210-
'Id'
211-
, 'PostId'
212-
, 'VoteTypeId'
213-
, 'UserId'
214-
, 'CreationDate'
215-
, 'BountyAmount'
216-
]
217-
elif table == 'Posts':
218-
keys = [
219-
'Id'
220-
, 'PostTypeId'
221-
, 'AcceptedAnswerId'
222-
, 'ParentId'
223-
, 'CreationDate'
224-
, 'Score'
225-
, 'ViewCount'
226-
, 'Body'
227-
, 'OwnerUserId'
228-
, 'LastEditorUserId'
229-
, 'LastEditorDisplayName'
230-
, 'LastEditDate'
231-
, 'LastActivityDate'
232-
, 'Title'
233-
, 'Tags'
234-
, 'AnswerCount'
235-
, 'CommentCount'
236-
, 'FavoriteCount'
237-
, 'ClosedDate'
238-
, 'CommunityOwnedDate'
239-
]
240-
241-
# If the user has not explicitly asked for loading the body, we replace it with NULL
242-
if not args.with_post_body:
243-
specialRules[('Posts', 'Body')] = 'NULL'
244-
245-
elif table == 'Tags':
246-
keys = [
247-
'Id'
248-
, 'TagName'
249-
, 'Count'
250-
, 'ExcerptPostId'
251-
, 'WikiPostId'
252-
]
253-
elif table == 'PostHistory':
254-
keys = [
255-
'Id',
256-
'PostHistoryTypeId',
257-
'PostId',
258-
'RevisionGUID',
259-
'CreationDate',
260-
'UserId',
261-
'Text'
262-
]
263-
elif table == 'Comments':
264-
keys = [
265-
'Id',
266-
'PostId',
267-
'Score',
268-
'Text',
269-
'CreationDate',
270-
'UserId',
271-
]
272284

273285
try:
274286
# Python 2/3 compatibility
275287
input = raw_input
276288
except NameError:
277289
pass
278290

279-
choice = input('This will drop the {} table. Are you sure [y/n]?'.format(table))
280291

292+
if table == 'Posts':
293+
# If the user has not explicitly asked for loading the body, we replace it with NULL
294+
if not args.with_post_body:
295+
specialRules[('Posts', 'Body')] = 'NULL'
296+
297+
choice = input('This will drop the {} table. Are you sure [y/n]?'.format(table))
281298
if len(choice) > 0 and choice[0].lower() == 'y':
282299
handleTable(table, keys, args.dbname, args.file, args.host, args.port, args.username, args.password)
283300
else:
284301
six.print_("Cancelled.")
285-

sql/Badges_fk.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE badges ADD CONSTRAINT fk_badges_userid FOREIGN KEY (userid) REFERENCES users (id);

sql/Comments_fk.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ALTER TABLE Comments ADD CONSTRAINT fk_comments_userid FOREIGN KEY (userid) REFERENCES users (id);
2+
ALTER TABLE Comments ADD CONSTRAINT fk_comments_postid FOREIGN KEY (postid) REFERENCES posts (id);

sql/Comments_post.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ CREATE INDEX cmnts_postid_idx ON Comments USING hash (PostId)
66
CREATE INDEX cmnts_creation_date_idx ON Comments USING btree (CreationDate)
77
WITH (FILLFACTOR = 100);
88
CREATE INDEX cmnts_userid_idx ON Comments USING btree (UserId)
9-
WITH (FILLFACTOR = 100);
9+
WITH (FILLFACTOR = 100);

sql/PostHistory_fk.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ALTER TABLE Posthistory ADD CONSTRAINT fk_posthistory_userid FOREIGN KEY (userid) REFERENCES users (id);
2+
ALTER TABLE Posthistory ADD CONSTRAINT fk_posthistory_postid FOREIGN KEY (postid) REFERENCES posts (id);

sql/PostLinks_fk.sql

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
-- impossible to enforce so set NULL
2+
UPDATE Postlinks SET postid=NULL WHERE postid NOT IN (SELECT DISTINCT id FROM Posts);
3+
ALTER TABLE Postlinks ADD CONSTRAINT fk_postlinks_postid FOREIGN KEY (postid) REFERENCES posts (id);
4+
UPDATE Postlinks SET relatedpostid=NULL WHERE relatedpostid NOT IN (SELECT DISTINCT id FROM Posts);
5+
ALTER TABLE Postlinks ADD CONSTRAINT fk_postlinks_relatedpostid FOREIGN KEY (relatedpostid) REFERENCES posts (id);

sql/PostLinks_pre.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ DROP TABLE IF EXISTS PostLinks CASCADE;
22
CREATE TABLE PostLinks (
33
Id int PRIMARY KEY ,
44
CreationDate timestamp not NUll ,
5-
PostId int not NULL ,
6-
RelatedPostId int not NULL ,
5+
PostId int , -- not NULL ,
6+
RelatedPostId int , -- not NULL ,
77
LinkTypeId int not Null
88
);

sql/Posts_fk.sql

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE Posts ADD CONSTRAINT fk_posts_parentid FOREIGN KEY (parentid) REFERENCES posts (id);
2+
ALTER TABLE Posts ADD CONSTRAINT fk_posts_owneruserid FOREIGN KEY (owneruserid) REFERENCES users (id);
3+
ALTER TABLE Posts ADD CONSTRAINT fk_posts_lasteditoruserid FOREIGN KEY (lasteditoruserid) REFERENCES users (id);

sql/Tags_fk.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- dummy query
2+
SELECT 1;

sql/Tags_pre.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ DROP TABLE IF EXISTS Tags CASCADE;
22
CREATE TABLE Tags (
33
Id int PRIMARY KEY ,
44
TagName text not NULL ,
5-
Count int,
6-
ExcerptPostId int,
5+
Count int ,
6+
ExcerptPostId int ,
77
WikiPostId int
88
);

sql/Users_fk.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- dummy query
2+
SELECT 1;

sql/Votes_fk.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ALTER TABLE Votes ADD CONSTRAINT fk_votes_userid FOREIGN KEY (userid) REFERENCES users (id);
2+
-- impossible to enforce so set NULL
3+
UPDATE Votes SET postid=NULL WHERE postid NOT IN (SELECT DISTINCT id FROM Posts);
4+
ALTER TABLE Votes ADD CONSTRAINT fk_votes_postid FOREIGN KEY (postid) REFERENCES posts (id);

sql/Votes_pre.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
DROP TABLE IF EXISTS Votes CASCADE;
22
CREATE TABLE Votes (
33
Id int PRIMARY KEY ,
4-
PostId int not NULL ,
4+
PostId int , -- not NULL ,
55
VoteTypeId int not NULL ,
66
UserId int ,
77
CreationDate timestamp not NULL ,

0 commit comments

Comments
 (0)