Merge pull request #13 from inuyasha2012/master

fix python3 open stopwords file UnicodeDecodeError bug
This commit is contained in:
Hain Wang 2017-11-07 20:42:25 -06:00 committed by GitHub
commit 902fd83808
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -104,7 +104,10 @@ def _load_stopwords(file_path):
load stop words
'''
global _stopwords
words = open(file_path, 'r')
if sys.version_info[0] < 3:
words = open(file_path, 'r')
else:
words = open(file_path, 'r', encoding='utf-8')
stopwords = words.readlines()
for w in stopwords:
_stopwords.add(any2unicode(w).strip())