-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathwikifile.py
265 lines (241 loc) · 8.17 KB
/
wikifile.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
# -*- coding: utf-8 -*-
# Copyright 2009 Mr.Z-man
# This file is part of wikitools.
# wikitools is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# wikitools is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with wikitools. If not, see <http://www.gnu.org/licenses/>.
import wiki
import page
import api
import urllib2
class FileDimensionError(wiki.WikiError):
"""Invalid dimensions"""
class UploadError(wiki.WikiError):
"""Error during uploading"""
class File(page.Page):
"""A file on the wiki"""
def __init__(self, wiki, title, check=True, followRedir=False, section=False, sectionnumber=False, pageid=False):
"""
wiki - A wiki object
title - The page title, as a string or unicode object
check - Checks for existence, normalizes title, required for most things
followRedir - follow redirects (check must be true)
section - the section name
sectionnumber - the section number
pageid - pageid, can be in place of title
"""
page.Page.__init__(self, wiki, title, check, followRedir, section, sectionnumber, pageid=pageid)
if self.namespace != 6:
self.setNamespace(6, check)
self.usage = []
self.history = []
def getHistory(self, force=False):
if self.history and not force:
return self.history
if self.pageid == 0 and not self.title:
self.setPageInfo()
if not self.exists:
raise NoPage
params = {
'action': 'query',
'prop': 'imageinfo',
'iilimit': self.site.limit,
}
if self.pageid > 0:
params['pageids'] = self.pageid
else:
params['titles'] = self.title
req = api.APIRequest(self.site, params)
response = req.query()
self.history = response['query']['pages'][str(self.pageid)]['imageinfo']
return self.history
def getUsage(self, titleonly=False, force=False, namespaces=False):
"""Gets a list of pages that use the file
titleonly - set to True to only create a list of strings,
else it will be a list of Page objects
force - reload the list even if it was generated before
namespaces - List of namespaces to restrict to (queries with this option will not be cached)
"""
if self.usage and not reload:
if titleonly:
if namespaces is not False:
return [p.title for p in self.usage if p.namespace in namespaces]
else:
return [p.title for p in self.usage]
if namespaces is False:
return self.usage
else:
return [p for p in self.usage if p.namespace in namespaces]
else:
ret = []
usage = []
for title in self.__getUsageInternal(namespaces):
usage.append(title)
if titleonly:
ret.append(title.title)
if titleonly:
return ret
if namespaces is False:
self.usage = usage
return usage
def getUsageGen(self, titleonly=False, force=False, namespaces=False):
"""Generator function for pages that use the file
titleonly - set to True to return strings,
else it will return Page objects
force - reload the list even if it was generated before
namespaces - List of namespaces to restrict to (queries with this option will not be cached)
"""
if self.usage and not reload:
for title in self.usage:
if namespaces is False or title.namespace in namespaces:
if titleonly:
yield title.title
else:
yield title
else:
if namespaces is False:
self.usage = []
for title in self.__getUsageInternal():
if namespaces is False:
self.usage.append(title)
if titleonly:
yield title.title
else:
yield title
def __getUsageInternal(self, namespaces=False):
params = {'action':'query',
'list':'imageusage',
'iutitle':self.title,
'iulimit':self.site.limit,
}
if namespaces is not False:
params['iunamespace'] = '|'.join([str(ns) for ns in namespaces])
while True:
req = api.APIRequest(self.site, params)
data = req.query(False)
for item in data['query']['imageusage']:
yield page.Page(self.site, item['title'], check=False, followRedir=False)
try:
params['iucontinue'] = data['query-continue']['imageusage']['iucontinue']
except:
break
def __extractToList(self, json, stuff):
list = []
if stuff in json['query']:
for item in json['query'][stuff]:
list.append(item['title'])
return list
def getURL(self):
if self.pageid == 0:
self.setPageInfo()
params = {'action':'query',
'prop':'imageinfo',
'iiprop':'url'
}
if self.pageid != 0:
params['pageids'] = self.pageid
elif self.title:
params['titles'] = self.title
else:
self.setPageInfo()
if not self.exists: # Non-existant files may be on a shared repo (e.g. commons)
params['titles'] = self.title
else:
params['pageids'] = self.pageid
req = api.APIRequest(self.site, params)
res = req.query(False)
return res['query']['pages'][res['query']['pages'].keys()[0]]['imageinfo'][0]['url']
def download(self, width=False, height=False, location=False, urlQuery=None):
"""Download the image to a local file
width/height - set width OR height of the downloaded image
location - set the filename to save to. If not set, the page title
minus the namespace prefix will be used and saved to the current directory
urlQuery - If specified, this will be added as ?urlQuery after the download URL
"""
if self.pageid == 0:
self.setPageInfo()
params = {'action':'query',
'prop':'imageinfo',
'iiprop':'url'
}
if width and height:
raise FileDimensionError("Can't specify both width and height")
if width:
params['iiurlwidth'] = width
if height:
params['iiurlheight'] = height
if self.pageid != 0:
params['pageids'] = self.pageid
elif self.title:
params['titles'] = self.title
else:
self.setPageInfo()
if not self.exists: # Non-existant files may be on a shared repo (e.g. commons)
params['titles'] = self.title
else:
params['pageids'] = self.pageid
req = api.APIRequest(self.site, params)
res = req.query(False)
key = res['query']['pages'].keys()[0]
url = res['query']['pages'][key]['imageinfo'][0]['url']
if urlQuery is not None:
url += '?' + urlQuery
if not location:
location = self.title.split(':', 1)[1]
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.site.cookies))
headers = { 'User-agent': self.site.useragent }
request = urllib2.Request(url, None, headers)
data = opener.open(request)
f = open(location, 'wb', 0)
f.write(data.read())
f.close()
return location
def upload(self, fileobj=None, comment='', url=None, ignorewarnings=False, watch=False):
"""Upload a file, requires the "poster" module
fileobj - A file object opened for reading
comment - The log comment, used as the inital page content if the file
doesn't already exist on the wiki
url - A URL to upload the file from, if allowed on the wiki
ignorewarnings - Ignore warnings about duplicate files, etc.
watch - Add the page to your watchlist
"""
if not api.canupload and fileobj:
raise UploadError("The poster module is required for file uploading")
if not fileobj and not url:
raise UploadError("Must give either a file object or a URL")
if fileobj and url:
raise UploadError("Cannot give a file and a URL")
if fileobj:
if not isinstance(fileobj, file):
raise UploadError('If uploading from a file, a file object must be passed')
if fileobj.mode not in ['r', 'rb', 'r+']:
raise UploadError('File must be readable')
fileobj.seek(0)
params = {'action':'upload',
'comment':comment,
'filename':self.unprefixedtitle,
'token':self.getToken('edit') # There's no specific "upload" token
}
if url:
params['url'] = url
else:
params['file'] = fileobj
if ignorewarnings:
params['ignorewarnings'] = ''
if watch:
params['watch'] = ''
req = api.APIRequest(self.site, params, write=True, multipart=bool(fileobj))
res = req.query()
if 'upload' in res and res['upload']['result'] == 'Success':
self.wikitext = ''
self.links = []
self.templates = []
self.exists = True
return res