add compatible codes for py2

2017-10-18 09:49:09 +08:00 · 2017-10-18 09:49:09 +08:00 · 2316042eea
commit 2316042eea
parent b92ba06837
4 changed files with 26 additions and 8 deletions
--- a/Requirements.txt
+++ b/Requirements.txt
@ -1 +1 @@
-synonyms>=1.0
+synonyms>=1.1
--- a/demo.py
+++ b/demo.py
@ -35,6 +35,7 @@ if sys.version_info[0] < 3:
 import synonyms # https://github.com/huyingxi/Synonyms
 import numpy
 import unittest
+import thulac

 # run testcase: python /Users/hain/ai/Synonyms/demo.py Test.testExample
 class Test(unittest.TestCase):
@ -70,7 +71,7 @@ class Test(unittest.TestCase):
            tags.append(_[1])
        for (k,v) in enumerate(tags):
            if v.startswith("n") or v.startswith("v"): # 去停，去标，去副词、形容词、代词 etc.
-                print("%s: %s" % (words[k], synonyms.nearby(words[k])))
+                synonyms.display(words[k]) # synonyms.display calls synonyms.nearby

 def test():
    unittest.main()
--- a/setup.py
+++ b/setup.py
@ -12,7 +12,7 @@ Welcome
 """

 setup(name='synonyms',
-      version='1.0',
+      version='1.1',
      description='Chinese Synonyms for Natural Language Processing and Understanding',
      long_description=LONGDOC,
      author='Hai Liang Wang, Hu Ying Xi',
--- a/synonyms/init.py
+++ b/synonyms/init.py
@ -27,19 +27,29 @@ import sys
 curdir = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(curdir)

+PLT = 2
+
 if sys.version_info[0] < 3:
    reload(sys)
    sys.setdefaultencoding("utf-8")
    # raise "Must be using Python 3"
+else:
+    PLT = 3

 import gzip
 import thulac # http://thulac.thunlp.org/
 from collections import defaultdict
-wn_raw_data=gzip.open(os.path.join(curdir, 'data', 'words.nearby.gz'),'rt', encoding='utf-8', errors = "ignore")

 _vocab = defaultdict(lambda: [[], []])
 _size = 0
 _thulac = thulac.thulac() #默认模式
+_fin = []
+_fin_path = os.path.join(curdir, 'data', 'words.nearby.gz')
+if PLT == 2:
+    import io
+    _fin=io.TextIOWrapper(io.BufferedReader(gzip.open(_fin_path)), encoding='utf8', errors='ignore')
+else:
+    _fin=gzip.open(_fin_path,'rt', encoding='utf-8', errors = "ignore")

 def add_word_to_vocab(word, nearby, nearby_score):
    '''
@ -47,6 +57,9 @@ def add_word_to_vocab(word, nearby, nearby_score):
    '''
    global _size
    if not word is None:
+        if PLT == 2:
+            word = word.encode("utf-8")
+            nearby = [z.encode("utf-8") for z in nearby]
        _vocab[word] = [nearby, nearby_score]
        _size += 1

@ -57,7 +70,7 @@ def _build_vocab():
    c = None # current word
    w = []   # word nearby 
    s = []   # score of word nearby
-    for v in wn_raw_data.readlines():
+    for v in _fin.readlines():
        v = v.strip()
        if v is None or len(v) == 0: continue
        if v.startswith("query:"):
@ -126,10 +139,14 @@ def compare(s1, s2):
    w2, t2 = _segment_words(s2)
    return max(_similarity(w1, t1, w2, t2), _similarity(w2, t2, w1, t1))

+def display(word):
+    print("'%s'近义词：" % word)
+    o = nearby("人脸")
+    for k,v in enumerate(o[0]):
+        print("  %d. %s:%s" %(k+1, v, o[1][k]))
+
 def main():
-    print("人脸", nearby("人脸"))
-    print("识别", nearby("识别"))
-    print("OOV", nearby("NOT_EXIST"))
+    display("人脸")

 if __name__ == '__main__':
    main()