1
1
# coding:utf-8
2
2
3
+ from __future__ import absolute_import
3
4
import logging
4
5
from random import shuffle
5
6
import chardet
10
11
from lxml .cssselect import CSSSelector
11
12
12
13
from talon .constants import RE_DELIMITER
14
+ import six
13
15
14
16
15
17
def safe_format (format_string , * args , ** kwargs ):
@@ -28,7 +30,7 @@ def safe_format(format_string, *args, **kwargs):
28
30
except (UnicodeEncodeError , UnicodeDecodeError ):
29
31
format_string = to_utf8 (format_string )
30
32
args = [to_utf8 (p ) for p in args ]
31
- kwargs = {k : to_utf8 (v ) for k , v in kwargs .iteritems ()}
33
+ kwargs = {k : to_utf8 (v ) for k , v in six .iteritems (kwargs )}
32
34
return format_string .format (* args , ** kwargs )
33
35
34
36
# ignore other errors
@@ -47,7 +49,7 @@ def to_unicode(str_or_unicode, precise=False):
47
49
"""
48
50
encoding = quick_detect_encoding (str_or_unicode ) if precise else 'utf-8'
49
51
if isinstance (str_or_unicode , str ):
50
- return unicode (str_or_unicode , encoding , 'replace' )
52
+ return six . text_type (str_or_unicode , encoding , 'replace' )
51
53
return str_or_unicode
52
54
53
55
@@ -61,7 +63,7 @@ def detect_encoding(string):
61
63
detected = chardet .detect (string )
62
64
if detected :
63
65
return detected .get ('encoding' ) or 'utf-8'
64
- except Exception , e :
66
+ except Exception as e :
65
67
pass
66
68
return 'utf-8'
67
69
@@ -76,7 +78,7 @@ def quick_detect_encoding(string):
76
78
detected = cchardet .detect (string )
77
79
if detected :
78
80
return detected .get ('encoding' ) or detect_encoding (string )
79
- except Exception , e :
81
+ except Exception as e :
80
82
pass
81
83
return detect_encoding (string )
82
84
@@ -87,7 +89,7 @@ def to_utf8(str_or_unicode):
87
89
>>> utils.to_utf8(u'hi')
88
90
'hi'
89
91
"""
90
- if isinstance (str_or_unicode , unicode ):
92
+ if isinstance (str_or_unicode , six . text_type ):
91
93
return str_or_unicode .encode ("utf-8" , "ignore" )
92
94
return str (str_or_unicode )
93
95
@@ -173,7 +175,7 @@ def _rm_excessive_newlines(s):
173
175
def _encode_utf8 (s ):
174
176
"""Encode in 'utf-8' if unicode
175
177
"""
176
- return s .encode ('utf-8' ) if isinstance (s , unicode ) else s
178
+ return s .encode ('utf-8' ) if isinstance (s , six . text_type ) else s
177
179
178
180
179
181
_UTF8_DECLARATION = ('<meta http-equiv="Content-Type" content="text/html;'
0 commit comments