Skip to content

Commit

Permalink
update web_develop
Browse files Browse the repository at this point in the history
  • Loading branch information
dongweiming committed Jun 26, 2016
1 parent b4823f8 commit 992c90a
Show file tree
Hide file tree
Showing 13 changed files with 16 additions and 20 deletions.
Binary file added chapter11/section1/data/a-2016-06-30_00000.bz2
Binary file not shown.
Binary file added chapter11/section1/data/b-2016-06-30_00000.bz2
Binary file not shown.
Binary file added chapter11/section1/data/c-2016-06-30_00000.bz2
Binary file not shown.
Binary file added chapter11/section1/data/d-2016-06-30_00000.bz2
Binary file not shown.
2 changes: 1 addition & 1 deletion chapter11/section1/map_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def reducer_count(item):

if __name__ == '__main__':
input_files = glob.glob(
'/data/log/scribe/movie/*/*/*/*-2015-07-30_00000.bz2')
'/home/ubuntu/web_develop/chapter11/section1/data/*.bz2')
mapper = MapReduce(mapper_match, reducer_match)
cookie_feq = mapper(input_files)
print 'Result: {}'.format(cookie_feq)
Expand Down
12 changes: 0 additions & 12 deletions chapter11/section2/slave.conf

This file was deleted.

6 changes: 4 additions & 2 deletions chapter11/section2/tola.py → chapter11/section2/tyrion.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ def _filter_func(self):
filter_func_list = self.match_rules.values()

def wrapper(log, *args, **kwargs):
return any(func(log, *args, **kwargs) for func in filter_func_list)
return any(func(log, *args, **kwargs)
for func in filter_func_list)

return wrapper

Expand All @@ -61,7 +62,8 @@ def get_rdd(self):
dpark = DparkContext()

return dpark.union(
[dpark.textFile(path, splitSize=64 << 20) for path in self.paths]
[dpark.textFile(path, splitSize=64 << 20)
for path in self.paths]
).map(Weblog.from_line)

def get_flat_mapped_rdd(self):
Expand Down
4 changes: 2 additions & 2 deletions chapter11/section4/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def _get_frame(date_string):
def show_tables(date_string=None):
df = _get_frame(date_string)
if isinstance(df, bool) and not df:
return 'A bad date format!'
return 'Bad date format!'
return render_template(
'chapter11/section4/csv.html', df=df.to_html(classes='frame'),
date_string=date_string)
Expand All @@ -40,7 +40,7 @@ def serve_csv(date_string=None, user_index=None):
buffer = cStringIO.StringIO()
df = _get_frame(date_string)
if isinstance(df, bool) and not df:
return 'A bad date format!'
return 'Bad date format!'
if user_index is not None:
df = df.loc[user_index - 1] # 事实上返回的是一个Series
df.to_csv(buffer, encoding='utf-8')
Expand Down
5 changes: 3 additions & 2 deletions chapter13/section1/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@
'http://www.xicidaili.com',
'http://www.kuaidaili.com/free',
'http://www.proxylists.net/?HTTP',
'http://www.youdaili.net/Daili/http/4402.html',
'http://www.youdaili.net/Daili/http/4403.html',
# www.youdaili.net的地址随着日期不断更新
'http://www.youdaili.net/Daili/http/4565.html',
'http://www.youdaili.net/Daili/http/4562.html',
'http://www.kuaidaili.com',
'http://proxy.mimvp.com',
]
Expand Down
4 changes: 4 additions & 0 deletions chapter13/section1/proxy_fetcher_with_queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,3 +97,7 @@ def use_thread_with_queue():
queue.put(url)

queue.join()


if __name__ == '__main__':
use_thread_with_queue2()
2 changes: 1 addition & 1 deletion chapter6/section4/user_active.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def record_active(account_id, t=None):
key = ACCOUNT_ACTIVE_KEY
for arg in ('year', 'month', 'day'):
key = '{}:{}'.format(key, getattr(t, arg))
r.setbit(key, account_id, 1)
p.setbit(key, account_id, 1)
p.execute()


Expand Down
Binary file added data/dump.zip
Binary file not shown.
1 change: 1 addition & 0 deletions data/fake_useragent.json

Large diffs are not rendered by default.

0 comments on commit 992c90a

Please sign in to comment.