Skip to content

Commit dc27081

Browse files
authored
Fix unicode vs utf8 problem with magesec ruleset (#203)
1 parent 5fc173c commit dc27081

File tree

2 files changed

+23
-6
lines changed

2 files changed

+23
-6
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
.vscode/
12
*~
23
.idea/
34
.DS_Store

mwscan/ruleset.py

+22-6
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,8 @@
99
from requests.exceptions import RequestException
1010
from mwscan import settings
1111

12-
# For very old installs, eg CentOS: https://github.com/magesec/magesec/issues/60
12+
# For very old installs, eg CentOS:
13+
# https://github.com/magesec/magesec/issues/60
1314
try:
1415
requests.packages.urllib3.disable_warnings()
1516
except AttributeError:
@@ -36,14 +37,23 @@ def __init__(self, **kwargs):
3637

3738
def find_whitelist_in_rawrules(self, rawrules):
3839
# Find whitelist hashes from comments, because yara whitelist
39-
# hashing is too slow. See https://github.com/VirusTotal/yara/issues/592
40+
# hashing is too slow. See
41+
# https://github.com/VirusTotal/yara/issues/592
4042

4143
m = re.search(
4244
'/\*[^*]*WHITELIST = (\{.*?\})\s*\*/', rawrules, flags=re.DOTALL)
4345
return set(json.loads(m.group(1)) if m else [])
4446

4547
def get_rules(self):
46-
return self._recursive_fetch(self.rules_url)
48+
rawrules = self._recursive_fetch(self.rules_url)
49+
try:
50+
if type(rawrules) is unicode:
51+
return rawrules.encode('ascii', errors='ignore')
52+
except NameError:
53+
pass # py3
54+
55+
return rawrules
56+
4757

4858
def get_whitelist(self):
4959
if not self.whitelist_url:
@@ -81,7 +91,7 @@ def _get_cache_timestamp_content(self, cachefile):
8191
return mtime, cachedcontent
8292

8393
def _httpget(self, url):
84-
""" Fetch URL and use if-modified-since header, store in cache,
94+
""" Fetch URL and use if-modified-since header, store in cache,
8595
fail if upstream fails """
8696

8797
filename = last_url_path(url)
@@ -106,10 +116,15 @@ def _httpget(self, url):
106116
with open(cachefile, 'wb') as fh:
107117
fh.write(resp.content)
108118

109-
return resp.content.decode()
119+
# py3 vs py2
120+
if type(resp.content) is bytes:
121+
return resp.content.decode('utf-8', errors='ignore')
122+
else:
123+
return resp.content
110124

111125
if resp.status_code == 304:
112-
logging.debug('Upstream {0} is the same as our cache (HTTP 304)'.format(url))
126+
logging.debug(
127+
'Upstream {0} is the same as our cache (HTTP 304)'.format(url))
113128

114129
# Upstream hasn't changed (304) or has err'd
115130
if cachedcontent is not None:
@@ -151,6 +166,7 @@ def include(match):
151166
class Files(RulesProvider):
152167

153168
# initialize with Files(args)
169+
154170
def get_rules(self):
155171
path = self._args.rules
156172
logging.info("Loading {0}".format(self._args.rules))

0 commit comments

Comments
 (0)