Skip to content

Commit

Permalink
限制频率
Browse files Browse the repository at this point in the history
  • Loading branch information
tangrela committed Feb 9, 2018
1 parent fbc2636 commit 25e6d61
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 22 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ __pycache__/
*$py.class

parser.py
parser_.py
data.sqlite
config.py
# C extensions
Expand Down
49 changes: 49 additions & 0 deletions app/decorator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# coding:utf-8
from functools import wraps
from flask import abort, redirect, url_for, g, request, jsonify
from functools import update_wrapper
import time
from flask_login import current_user
from . import rd
from datetime import datetime


class RateLimit(object):
expiration_window = 10

def __init__(self, key_prefix, limit, per, send_x_headers):
self.reset = (int(time.time()) // per) * per + per
self.key = key_prefix + str(self.reset)
self.limit = limit
self.per = per
self.send_x_headers = send_x_headers
p = rd.pipeline()
p.incr(self.key)
p.expireat(self.key, self.reset + self.expiration_window)
self.current = min(p.execute()[0], limit)
remaining = property(lambda x: x.limit - x.current)
over_limit = property(lambda x: x.current >= x.limit)


def get_view_rate_limit():
return getattr(g, '_view_rate_limit', None)


def on_over_limit(limit):
return jsonify(dict(message='限制频率', status='fail')), 200


def ratelimit(limit, per=300, send_x_headers=True,
over_limit=on_over_limit,
scope_func=lambda: request.remote_addr,
key_func=lambda: request.endpoint):
def decorator(f):
def rate_limited(*args, **kwargs):
key = 'rate-limit/%s/%s/' % (key_func(), scope_func())
rlimit = RateLimit(key, limit, per, send_x_headers)
g._view_rate_limit = rlimit
if over_limit is not None and rlimit.over_limit:
return over_limit(rlimit)
return f(*args, **kwargs)
return update_wrapper(rate_limited, f)
return decorator
Binary file modified app/parser.pyc
Binary file not shown.
54 changes: 32 additions & 22 deletions app/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,10 @@
from app import db
from app.models import Context
import parser
from . import logger,rd
from . import logger, rd
from config import *
from captcha import *
from decorator import *

basedir = os.path.abspath('.')
clawer = os.path.join(basedir, 'tumblr_v2.py')
Expand All @@ -35,8 +36,11 @@
'<meta property="og:image" content="(.*?)" /><meta property="og:image:height"')
vhead = 'https://vt.tumblr.com/tumblr_%s.mp4'
HOME = 'http://%s.tumblr.com/api/read?&num=50'
headers={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"}
ban= ['TencentCloud','Savvis','ALICLOUD','GOOGLE-CLOUD']
headers = {
'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36"}
ban = ['TencentCloud', 'Savvis', 'ALICLOUD', 'GOOGLE-CLOUD', 'WANG-SUKEJI']
bad_ua=['FeedDemon ','BOT/0.1 (BOT for JCE)','CrawlDaddy ','Java','Feedly','UniversalFeedParser','ApacheBench','Swiftbot','ZmEu','Indy Library','oBot','jaunty','YandexBot','AhrefsBot','MJ12bot','WinHttp','EasouSpider','HttpClient','Microsoft URL Control','YYSpider','jaunty','Python-urllib','lightDeckReports Bot','PHP','Python','Go']


def check(uid):
url = HOME % uid
Expand Down Expand Up @@ -69,24 +73,25 @@ def getmd5():
return a.hexdigest()



def getipwhois(ip):
if rd.exists(ip) and rd.get(ip)!='home':
netname=rd.get(ip)
print '{} exists in redis,netname {}'.format(ip,netname)
if rd.exists(ip) and rd.get(ip) != 'home':
netname = rd.get(ip)
print '{} exists in redis,netname {}'.format(ip, netname)
else:
print '{} exists doesn\' exists in redis'.format(ip)
url='http://tool.chinaz.com/ipwhois?q={}'.format(ip)
url = 'http://tool.chinaz.com/ipwhois?q={}'.format(ip)
try:
r=requests.get(url,headers=headers,timeout=8)
r = requests.get(url, headers=headers, timeout=8)
try:
netname=re.findall('netname:(.*?)<br/>',r.content)[0].replace(' ','')
netname = re.findall('netname:(.*?)<br/>',
r.content)[0].replace(' ', '')
except:
netname=re.findall('<p>Name : (.*?)</p>',r.content)[0].replace(' ','')
rd.set(ip,netname)
except Exception,e:
netname = re.findall('<p>Name : (.*?)</p>',
r.content)[0].replace(' ', '')
rd.set(ip, netname)
except Exception, e:
print e
netname='home'
netname = 'home'
return netname


Expand All @@ -109,15 +114,15 @@ def before_request():
except:
ip = request.remote_addr
print ip
netname=getipwhois(ip)

netname = getipwhois(ip)


def log(string):
global ip
global ua
global netname
logger.info('ip:{ip},netname:{netname},UA:{ua},action:{string}'.format(ip=ip,netname=netname,ua=ua,string=string))
logger.info('ip:{ip},netname:{netname},UA:{ua},action:{string}'.format(
ip=ip, netname=netname, ua=ua, string=string))


@app.route('/')
Expand All @@ -129,12 +134,16 @@ def index():


@app.route('/api', methods=['POST'])
@ratelimit(limit=5, per=10)
def api():
global ua
global ip
url = request.form.get('url')
hash_ = request.form.get('hash')
captcha_code = request.form.get('captcha_code')
if ip in ['111.231.237.241','111.230.109.198','91.121.83.61'] or sum([i.lower() in netname.lower() for i in ban])>0:
retdata={}
if ip in ['111.231.237.241', '111.230.109.198', '91.121.83.61'] or sum([i.lower() in netname.lower() for i in ban]) > 0 or sum([i.lower() in ua.lower() for i in bad_ua]) > 0:
log('bad user')
retdata = {}
retdata['status'] = 'fail'
retdata['message'] = '机器人滚!如果不是机器人,请不要通过代理访问本站!'
return jsonify(retdata)
Expand All @@ -144,7 +153,7 @@ def api():
print 'session code is :', session.get('CAPTCHA')
if captcha_code.upper() == session.get('CAPTCHA'):
return jsonify({'captcha': 'pass'})
if hash_ != session.get('hash') or hash_ is None or request.headers['User-Agent'] is None or 'python' in request.headers['User-Agent'].lower():
if hash_ != session.get('hash') or hash_ is None:
log('may be a crawler!!! url {}'.format(url))
return jsonify({'captcha': 'ok'})
else:
Expand Down Expand Up @@ -279,12 +288,13 @@ def api():
def download():
id = request.args.get('id')
type = request.args.get('type')
log('download from {} {}'.format(id,type))
log('download from {} {}'.format(id, type))
if type == 'video':
isvideo = 1
else:
isvideo = 0
query_result = Context.query.filter_by(uid=id, isvideo=isvideo).order_by(Context.posttime.desc()).all()
query_result = Context.query.filter_by(
uid=id, isvideo=isvideo).order_by(Context.posttime.desc()).all()
if len(query_result) <> 0:
content = ''
for line in query_result:
Expand Down

0 comments on commit 25e6d61

Please sign in to comment.