9
9
from requests .exceptions import RequestException
10
10
from mwscan import settings
11
11
12
- # For very old installs, eg CentOS: https://github.com/magesec/magesec/issues/60
12
+ # For very old installs, eg CentOS:
13
+ # https://github.com/magesec/magesec/issues/60
13
14
try :
14
15
requests .packages .urllib3 .disable_warnings ()
15
16
except AttributeError :
@@ -36,14 +37,23 @@ def __init__(self, **kwargs):
36
37
37
38
def find_whitelist_in_rawrules (self , rawrules ):
38
39
# Find whitelist hashes from comments, because yara whitelist
39
- # hashing is too slow. See https://github.com/VirusTotal/yara/issues/592
40
+ # hashing is too slow. See
41
+ # https://github.com/VirusTotal/yara/issues/592
40
42
41
43
m = re .search (
42
44
'/\*[^*]*WHITELIST = (\{.*?\})\s*\*/' , rawrules , flags = re .DOTALL )
43
45
return set (json .loads (m .group (1 )) if m else [])
44
46
45
47
def get_rules (self ):
46
- return self ._recursive_fetch (self .rules_url )
48
+ rawrules = self ._recursive_fetch (self .rules_url )
49
+ try :
50
+ if type (rawrules ) is unicode :
51
+ return rawrules .encode ('ascii' , errors = 'ignore' )
52
+ except NameError :
53
+ pass # py3
54
+
55
+ return rawrules
56
+
47
57
48
58
def get_whitelist (self ):
49
59
if not self .whitelist_url :
@@ -81,7 +91,7 @@ def _get_cache_timestamp_content(self, cachefile):
81
91
return mtime , cachedcontent
82
92
83
93
def _httpget (self , url ):
84
- """ Fetch URL and use if-modified-since header, store in cache,
94
+ """ Fetch URL and use if-modified-since header, store in cache,
85
95
fail if upstream fails """
86
96
87
97
filename = last_url_path (url )
@@ -106,10 +116,15 @@ def _httpget(self, url):
106
116
with open (cachefile , 'wb' ) as fh :
107
117
fh .write (resp .content )
108
118
109
- return resp .content .decode ()
119
+ # py3 vs py2
120
+ if type (resp .content ) is bytes :
121
+ return resp .content .decode ('utf-8' , errors = 'ignore' )
122
+ else :
123
+ return resp .content
110
124
111
125
if resp .status_code == 304 :
112
- logging .debug ('Upstream {0} is the same as our cache (HTTP 304)' .format (url ))
126
+ logging .debug (
127
+ 'Upstream {0} is the same as our cache (HTTP 304)' .format (url ))
113
128
114
129
# Upstream hasn't changed (304) or has err'd
115
130
if cachedcontent is not None :
@@ -151,6 +166,7 @@ def include(match):
151
166
class Files (RulesProvider ):
152
167
153
168
# initialize with Files(args)
169
+
154
170
def get_rules (self ):
155
171
path = self ._args .rules
156
172
logging .info ("Loading {0}" .format (self ._args .rules ))
0 commit comments