Skip to content

Commit cbb39fb

Browse files
madtibomusically-ut
authored andcommitted
Add foreign key support for users id and posts id (#8)
* add foreign key support for users id and posts id using the "--foreign-keys" switch WARNING: when using the foreign keys option, some entries in votes and postlinks might be updated to enforce data integrity * Do not force constraint validation by setting them as 'not valid' * log table name
1 parent d5aa5c2 commit cbb39fb

10 files changed

+170
-118
lines changed

load_into_pg.py

+134-117
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def _createCmdTuple(cursor, keys, templ, attribs, insertJson):
3030
"""Use the cursor to mogrify a tuple of data.
3131
The passed data in `attribs` is augmented with default data (NULLs) and the
3232
order of data in the tuple is the same as in the list of `keys`. The
33-
`cursor` is used toe mogrify the data and the `templ` is the template used
33+
`cursor` is used to mogrify the data and the `templ` is the template used
3434
for the mogrification.
3535
"""
3636
defs = _makeDefValues(keys)
@@ -45,17 +45,124 @@ def _createCmdTuple(cursor, keys, templ, attribs, insertJson):
4545
values_to_insert = cursor.mogrify(templ, defs)
4646
return cursor.mogrify(templ, defs)
4747

48-
def handleTable(table, keys, insertJson, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPassword):
48+
def _getTableKeys(table):
49+
"""Return an array of the keys for a given table"""
50+
keys = None
51+
if table == 'Users':
52+
keys = [
53+
'Id'
54+
, 'Reputation'
55+
, 'CreationDate'
56+
, 'DisplayName'
57+
, 'LastAccessDate'
58+
, 'WebsiteUrl'
59+
, 'Location'
60+
, 'AboutMe'
61+
, 'Views'
62+
, 'UpVotes'
63+
, 'DownVotes'
64+
, 'ProfileImageUrl'
65+
, 'Age'
66+
, 'AccountId'
67+
]
68+
elif table == 'Badges':
69+
keys = [
70+
'Id'
71+
, 'UserId'
72+
, 'Name'
73+
, 'Date'
74+
]
75+
elif table == 'PostLinks':
76+
keys = [
77+
'Id'
78+
, 'CreationDate'
79+
, 'PostId'
80+
, 'RelatedPostId'
81+
, 'LinkTypeId'
82+
]
83+
elif table == 'Comments':
84+
keys = [
85+
'Id'
86+
, 'PostId'
87+
, 'Score'
88+
, 'Text'
89+
, 'CreationDate'
90+
, 'UserId'
91+
]
92+
elif table == 'Votes':
93+
keys = [
94+
'Id'
95+
, 'PostId'
96+
, 'VoteTypeId'
97+
, 'UserId'
98+
, 'CreationDate'
99+
, 'BountyAmount'
100+
]
101+
elif table == 'Posts':
102+
keys = [
103+
'Id'
104+
, 'PostTypeId'
105+
, 'AcceptedAnswerId'
106+
, 'ParentId'
107+
, 'CreationDate'
108+
, 'Score'
109+
, 'ViewCount'
110+
, 'Body'
111+
, 'OwnerUserId'
112+
, 'LastEditorUserId'
113+
, 'LastEditorDisplayName'
114+
, 'LastEditDate'
115+
, 'LastActivityDate'
116+
, 'Title'
117+
, 'Tags'
118+
, 'AnswerCount'
119+
, 'CommentCount'
120+
, 'FavoriteCount'
121+
, 'ClosedDate'
122+
, 'CommunityOwnedDate'
123+
]
124+
elif table == 'Tags':
125+
keys = [
126+
'Id'
127+
, 'TagName'
128+
, 'Count'
129+
, 'ExcerptPostId'
130+
, 'WikiPostId'
131+
]
132+
elif table == 'PostHistory':
133+
keys = [
134+
'Id',
135+
'PostHistoryTypeId',
136+
'PostId',
137+
'RevisionGUID',
138+
'CreationDate',
139+
'UserId',
140+
'Text'
141+
]
142+
elif table == 'Comments':
143+
keys = [
144+
'Id',
145+
'PostId',
146+
'Score',
147+
'Text',
148+
'CreationDate',
149+
'UserId',
150+
]
151+
return keys
152+
153+
def handleTable(table, insertJson, createFk, dbname, mbDbFile, mbHost, mbPort, mbUsername, mbPassword):
49154
"""Handle the table including the post/pre processing."""
155+
keys = _getTableKeys(table)
50156
dbFile = mbDbFile if mbDbFile is not None else table + '.xml'
51157
tmpl = _createMogrificationTemplate(table, keys, insertJson)
52158
start_time = time.time()
53159

54160
try:
55161
pre = open('./sql/' + table + '_pre.sql').read()
56162
post = open('./sql/' + table + '_post.sql').read()
163+
fk = open('./sql/' + table + '_fk.sql').read()
57164
except IOError as e:
58-
six.print_("Could not load pre/post sql. Are you running from the correct path?", file=sys.stderr)
165+
six.print_("Could not load pre/post/fk sql. Are you running from the correct path?", file=sys.stderr)
59166
sys.exit(-1)
60167

61168
dbConnectionParam = "dbname={}".format(dbname)
@@ -74,6 +181,7 @@ def handleTable(table, keys, insertJson, dbname, mbDbFile, mbHost, mbPort, mbUse
74181
if mbPassword is not None:
75182
dbConnectionParam += ' password={}'.format(mbPassword)
76183

184+
77185
try:
78186
with pg.connect(dbConnectionParam) as conn:
79187
with conn.cursor() as cur:
@@ -95,13 +203,12 @@ def handleTable(table, keys, insertJson, dbname, mbDbFile, mbHost, mbPort, mbUse
95203
for row_attribs in rows
96204
]
97205
)
98-
99206
if len(valuesStr) > 0:
100207
cmd = 'INSERT INTO ' + table + \
101208
' VALUES\n' + valuesStr + ';'
102209
cur.execute(cmd)
103210
conn.commit()
104-
six.print_('Table processing took {:.1f} seconds'.format(time.time() - start_time))
211+
six.print_('Table {0} processing took {1:.1f} seconds'.format(table, time.time() - start_time))
105212

106213
# Post-processing (creation of indexes)
107214
start_time = time.time()
@@ -110,6 +217,14 @@ def handleTable(table, keys, insertJson, dbname, mbDbFile, mbHost, mbPort, mbUse
110217
cur.execute(post)
111218
conn.commit()
112219
six.print_('Post processing took {} seconds'.format(time.time() - start_time))
220+
if createFk:
221+
# fk-processing (creation of foreign keys)
222+
start_time = time.time()
223+
six.print_('fk processing ...')
224+
if post != '':
225+
cur.execute(fk)
226+
conn.commit()
227+
six.print_('fk processing took {} seconds'.format(time.time() - start_time))
113228

114229
except IOError as e:
115230
six.print_("Could not read from file {}.".format(dbFile), file=sys.stderr)
@@ -122,8 +237,6 @@ def handleTable(table, keys, insertJson, dbname, mbDbFile, mbHost, mbPort, mbUse
122237
six.print_("Warning from the database.", file=sys.stderr)
123238
six.print_("pg.Warning: {0}".format(str(w)), file=sys.stderr)
124239

125-
126-
127240
#############################################################
128241

129242
parser = argparse.ArgumentParser()
@@ -173,127 +286,31 @@ def handleTable(table, keys, insertJson, dbname, mbDbFile, mbHost, mbPort, mbUse
173286
, action = 'store_true'
174287
, default = False
175288
)
176-
args = parser.parse_args()
177289

178-
table = args.table
179-
keys = None
180-
181-
if table == 'Users':
182-
keys = [
183-
'Id'
184-
, 'Reputation'
185-
, 'CreationDate'
186-
, 'DisplayName'
187-
, 'LastAccessDate'
188-
, 'WebsiteUrl'
189-
, 'Location'
190-
, 'AboutMe'
191-
, 'Views'
192-
, 'UpVotes'
193-
, 'DownVotes'
194-
, 'ProfileImageUrl'
195-
, 'Age'
196-
, 'AccountId'
197-
]
198-
elif table == 'Badges':
199-
keys = [
200-
'Id'
201-
, 'UserId'
202-
, 'Name'
203-
, 'Date'
204-
]
205-
elif table == 'PostLinks':
206-
keys = [
207-
'Id'
208-
, 'CreationDate'
209-
, 'PostId'
210-
, 'RelatedPostId'
211-
, 'LinkTypeId'
212-
]
213-
elif table == 'Comments':
214-
keys = [
215-
'Id'
216-
, 'PostId'
217-
, 'Score'
218-
, 'Text'
219-
, 'CreationDate'
220-
, 'UserId'
221-
]
222-
elif table == 'Votes':
223-
keys = [
224-
'Id'
225-
, 'PostId'
226-
, 'VoteTypeId'
227-
, 'UserId'
228-
, 'CreationDate'
229-
, 'BountyAmount'
230-
]
231-
elif table == 'Posts':
232-
keys = [
233-
'Id'
234-
, 'PostTypeId'
235-
, 'AcceptedAnswerId'
236-
, 'ParentId'
237-
, 'CreationDate'
238-
, 'Score'
239-
, 'ViewCount'
240-
, 'Body'
241-
, 'OwnerUserId'
242-
, 'LastEditorUserId'
243-
, 'LastEditorDisplayName'
244-
, 'LastEditDate'
245-
, 'LastActivityDate'
246-
, 'Title'
247-
, 'Tags'
248-
, 'AnswerCount'
249-
, 'CommentCount'
250-
, 'FavoriteCount'
251-
, 'ClosedDate'
252-
, 'CommunityOwnedDate'
253-
]
290+
parser.add_argument( '--foreign-keys'
291+
, help = 'Create foreign keys.'
292+
, action = 'store_true'
293+
, default = False
294+
)
254295

255-
# If the user has not explicitly asked for loading the body, we replace it with NULL
256-
if not args.with_post_body:
257-
specialRules[('Posts', 'Body')] = 'NULL'
296+
args = parser.parse_args()
258297

259-
elif table == 'Tags':
260-
keys = [
261-
'Id'
262-
, 'TagName'
263-
, 'Count'
264-
, 'ExcerptPostId'
265-
, 'WikiPostId'
266-
]
267-
elif table == 'PostHistory':
268-
keys = [
269-
'Id',
270-
'PostHistoryTypeId',
271-
'PostId',
272-
'RevisionGUID',
273-
'CreationDate',
274-
'UserId',
275-
'Text'
276-
]
277-
elif table == 'Comments':
278-
keys = [
279-
'Id',
280-
'PostId',
281-
'Score',
282-
'Text',
283-
'CreationDate',
284-
'UserId',
285-
]
298+
table = args.table
286299

287300
try:
288301
# Python 2/3 compatibility
289302
input = raw_input
290303
except NameError:
291304
pass
292305

293-
choice = input('This will drop the {} table. Are you sure [y/n]?'.format(table))
294306

307+
if table == 'Posts':
308+
# If the user has not explicitly asked for loading the body, we replace it with NULL
309+
if not args.with_post_body:
310+
specialRules[('Posts', 'Body')] = 'NULL'
311+
312+
choice = input('This will drop the {} table. Are you sure [y/n]? '.format(table))
295313
if len(choice) > 0 and choice[0].lower() == 'y':
296-
handleTable(table, keys, args.insert_json, args.dbname, args.file, args.host, args.port, args.username, args.password)
314+
handleTable(table, args.insert_json, args.foreign_keys, args.dbname, args.file, args.host, args.port, args.username, args.password)
297315
else:
298316
six.print_("Cancelled.")
299-

sql/Badges_fk.sql

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
ALTER TABLE badges ADD CONSTRAINT fk_badges_userid FOREIGN KEY (userid) REFERENCES users (id);

sql/Comments_fk.sql

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ALTER TABLE Comments ADD CONSTRAINT fk_comments_userid FOREIGN KEY (userid) REFERENCES users (id);
2+
ALTER TABLE Comments ADD CONSTRAINT fk_comments_postid FOREIGN KEY (postid) REFERENCES posts (id);

sql/Comments_post.sql

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ CREATE INDEX cmnts_postid_idx ON Comments USING hash (PostId)
66
CREATE INDEX cmnts_creation_date_idx ON Comments USING btree (CreationDate)
77
WITH (FILLFACTOR = 100);
88
CREATE INDEX cmnts_userid_idx ON Comments USING btree (UserId)
9-
WITH (FILLFACTOR = 100);
9+
WITH (FILLFACTOR = 100);

sql/PostHistory_fk.sql

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
ALTER TABLE Posthistory ADD CONSTRAINT fk_posthistory_userid FOREIGN KEY (userid) REFERENCES users (id);
2+
ALTER TABLE Posthistory ADD CONSTRAINT fk_posthistory_postid FOREIGN KEY (postid) REFERENCES posts (id);

sql/PostLinks_fk.sql

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
-- impossible to enforce these constraints, set as 'not valid' to disable
2+
-- initial test.
3+
--
4+
-- These constaints can be forced running the following queries:
5+
-- ALTER TABLE postlinks ALTER postid DROP NOT NULL;
6+
-- UPDATE postlinks SET postid=NULL WHERE postid NOT IN (SELECT DISTINCT id FROM Posts);
7+
-- ALTER TABLE postlinks VALIDATE CONSTRAINT fk_postlinks_postid;
8+
-- ALTER TABLE postlinks ALTER relatedpostid DROP NOT NULL;
9+
-- UPDATE postlinks SET relatedpostid=NULL WHERE relatedpostid NOT IN (SELECT DISTINCT id FROM Posts);
10+
-- ALTER TABLE postlinks VALIDATE CONSTRAINT fk_postlinks_relatedpostid;
11+
--
12+
ALTER TABLE Postlinks ADD CONSTRAINT fk_postlinks_postid FOREIGN KEY (postid) REFERENCES posts (id) NOT VALID;
13+
ALTER TABLE Postlinks ADD CONSTRAINT fk_postlinks_relatedpostid FOREIGN KEY (relatedpostid) REFERENCES posts (id) NOT VALID;

sql/Posts_fk.sql

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
ALTER TABLE Posts ADD CONSTRAINT fk_posts_parentid FOREIGN KEY (parentid) REFERENCES posts (id);
2+
ALTER TABLE Posts ADD CONSTRAINT fk_posts_owneruserid FOREIGN KEY (owneruserid) REFERENCES users (id);
3+
ALTER TABLE Posts ADD CONSTRAINT fk_posts_lasteditoruserid FOREIGN KEY (lasteditoruserid) REFERENCES users (id);

sql/Tags_fk.sql

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- dummy query
2+
SELECT 1;

sql/Users_fk.sql

+2
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
-- dummy query
2+
SELECT 1;

sql/Votes_fk.sql

+10
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
ALTER TABLE Votes ADD CONSTRAINT fk_votes_userid FOREIGN KEY (userid) REFERENCES users (id);
2+
-- impossible to enforce this constraint, set as 'not valid' to disable
3+
-- initial test.
4+
--
5+
-- This constaint can be forced running the following queries:
6+
-- ALTER TABLE votes ALTER PostId DROP NOT NULL;
7+
-- UPDATE votes SET postid=NULL WHERE postid NOT IN (SELECT DISTINCT id FROM Posts);
8+
-- ALTER TABLE votes VALIDATE CONSTRAINT fk_votes_postid;
9+
--
10+
ALTER TABLE Votes ADD CONSTRAINT fk_votes_postid FOREIGN KEY (postid) REFERENCES posts (id) NOT VALID;

0 commit comments

Comments
 (0)