1
- # -*- coding: utf-8 -*-
1
+ # -*- coding: utf-8 -*-
2
2
'''
3
3
Sitemap
4
4
-------
8
8
9
9
from __future__ import unicode_literals
10
10
11
+ import re
11
12
import collections
12
13
import os .path
13
14
14
15
from datetime import datetime
15
16
from logging import warning , info
16
17
from codecs import open
18
+ from pytz import timezone
17
19
18
20
from pelican import signals , contents
19
21
from pelican .utils import get_date
46
48
47
49
def format_date (date ):
48
50
if date .tzinfo :
49
- tz = date .strftime ('%s ' )
51
+ tz = date .strftime ('%z ' )
50
52
tz = tz [:- 2 ] + ':' + tz [- 2 :]
51
53
else :
52
54
tz = "-00:00"
@@ -61,6 +63,11 @@ def __init__(self, context, settings, path, theme, output_path, *null):
61
63
self .now = datetime .now ()
62
64
self .siteurl = settings .get ('SITEURL' )
63
65
66
+
67
+ self .default_timezone = settings .get ('TIMEZONE' , 'UTC' )
68
+ self .timezone = getattr (self , 'timezone' , self .default_timezone )
69
+ self .timezone = timezone (self .timezone )
70
+
64
71
self .format = 'xml'
65
72
66
73
self .changefreqs = {
@@ -75,6 +82,8 @@ def __init__(self, context, settings, path, theme, output_path, *null):
75
82
'pages' : 0.5
76
83
}
77
84
85
+ self .sitemapExclude = []
86
+
78
87
config = settings .get ('SITEMAP' , {})
79
88
80
89
if not isinstance (config , dict ):
@@ -83,6 +92,7 @@ def __init__(self, context, settings, path, theme, output_path, *null):
83
92
fmt = config .get ('format' )
84
93
pris = config .get ('priorities' )
85
94
chfreqs = config .get ('changefreqs' )
95
+ self .sitemapExclude = config .get ('exclude' , [])
86
96
87
97
if fmt not in ('xml' , 'txt' ):
88
98
warning ("sitemap plugin: SITEMAP['format'] must be `txt' or `xml'" )
@@ -128,15 +138,19 @@ def write_url(self, page, fd):
128
138
if getattr (page , 'status' , 'published' ) != 'published' :
129
139
return
130
140
131
- page_path = os .path .join (self .output_path , page .url )
141
+ # We can disable categories/authors/etc by using False instead of ''
142
+ if not page .save_as :
143
+ return
144
+
145
+ page_path = os .path .join (self .output_path , page .save_as )
132
146
if not os .path .exists (page_path ):
133
147
return
134
148
135
149
lastdate = getattr (page , 'date' , self .now )
136
150
try :
137
151
lastdate = self .get_date_modified (page , lastdate )
138
152
except ValueError :
139
- warning ("sitemap plugin: " + page .url + " has invalid modification date," )
153
+ warning ("sitemap plugin: " + page .save_as + " has invalid modification date," )
140
154
warning ("sitemap plugin: using date value as lastmod." )
141
155
lastmod = format_date (lastdate )
142
156
@@ -150,25 +164,35 @@ def write_url(self, page, fd):
150
164
pri = self .priorities ['indexes' ]
151
165
chfreq = self .changefreqs ['indexes' ]
152
166
153
-
167
+ pageurl = '' if page .url == 'index.html' else page .url
168
+
169
+ #Exclude URLs from the sitemap:
154
170
if self .format == 'xml' :
155
- fd .write (XML_URL .format (self .siteurl , page .url , lastmod , chfreq , pri ))
171
+ flag = False
172
+ for regstr in self .sitemapExclude :
173
+ if re .match (regstr , pageurl ):
174
+ flag = True
175
+ break
176
+ if not flag :
177
+ fd .write (XML_URL .format (self .siteurl , pageurl , lastmod , chfreq , pri ))
156
178
else :
157
- fd .write (self .siteurl + '/' + loc + '\n ' )
179
+ fd .write (self .siteurl + '/' + pageurl + '\n ' )
158
180
159
- def get_date_modified (self , page , defalut ):
181
+ def get_date_modified (self , page , default ):
160
182
if hasattr (page , 'modified' ):
161
- return get_date (getattr (page , 'modified' ))
183
+ if isinstance (page .modified , datetime ):
184
+ return page .modified
185
+ return get_date (page .modified )
162
186
else :
163
- return defalut
187
+ return default
164
188
165
189
def set_url_wrappers_modification_date (self , wrappers ):
166
190
for (wrapper , articles ) in wrappers :
167
- lastmod = datetime .min
191
+ lastmod = datetime .min . replace ( tzinfo = self . timezone )
168
192
for article in articles :
169
- lastmod = max (lastmod , article .date )
193
+ lastmod = max (lastmod , article .date . replace ( tzinfo = self . timezone ) )
170
194
try :
171
- modified = self .get_date_modified (article , datetime .min );
195
+ modified = self .get_date_modified (article , datetime .min ). replace ( tzinfo = self . timezone )
172
196
lastmod = max (lastmod , modified )
173
197
except ValueError :
174
198
# Supressed: user will be notified.
@@ -186,7 +210,7 @@ def generate_output(self, writer):
186
210
self .set_url_wrappers_modification_date (self .context ['categories' ])
187
211
self .set_url_wrappers_modification_date (self .context ['tags' ])
188
212
self .set_url_wrappers_modification_date (self .context ['authors' ])
189
-
213
+
190
214
for article in self .context ['articles' ]:
191
215
pages += article .translations
192
216
@@ -202,15 +226,17 @@ def generate_output(self, writer):
202
226
FakePage = collections .namedtuple ('FakePage' ,
203
227
['status' ,
204
228
'date' ,
205
- 'url' ])
229
+ 'url' ,
230
+ 'save_as' ])
206
231
207
232
for standard_page_url in ['index.html' ,
208
233
'archives.html' ,
209
234
'tags.html' ,
210
235
'categories.html' ]:
211
236
fake = FakePage (status = 'published' ,
212
237
date = self .now ,
213
- url = standard_page_url )
238
+ url = standard_page_url ,
239
+ save_as = standard_page_url )
214
240
self .write_url (fake , fd )
215
241
216
242
for page in pages :
0 commit comments