From 9b1b716deec444af74adc9600d69a6e05255ec55 Mon Sep 17 00:00:00 2001 From: Hyebin Lee <84822464+eeHeaven@users.noreply.github.com> Date: Tue, 11 Jan 2022 16:58:07 +0900 Subject: [PATCH] =?UTF-8?q?utf=20-8=20=EA=B4=80=EB=A0=A8=20=EB=82=B4?= =?UTF-8?q?=EC=9A=A9=20=EC=B6=94=EA=B0=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/prepro/data_builder.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/prepro/data_builder.py b/src/prepro/data_builder.py index 31b80af7..7edc33ca 100644 --- a/src/prepro/data_builder.py +++ b/src/prepro/data_builder.py @@ -22,7 +22,7 @@ def load_json(p, lower): source = [] tgt = [] flag = False - for sent in json.load(open(p))['sentences']: + for sent in json.load(open(p, encoding ='utf-8-sig'))['sentences']: tokens = [t['word'] for t in sent['tokens']] if (lower): tokens = [t.lower() for t in tokens] @@ -137,7 +137,7 @@ def _rouge_clean(s): def hashhex(s): """Returns a heximal formated SHA1 hash of the input string.""" h = hashlib.sha1() - h.update(s.encode('utf-8')) + h.update(s.encode('utf-8-sig')) return h.hexdigest()