Create str_utils.py

This commit is contained in:
missQian 2020-10-04 21:31:36 +08:00 committed by GitHub
parent d64c6f591f
commit eba476ed56
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -0,0 +1,38 @@
import re
def normalize_sent(sent):
sent = str(sent).replace('``', '"')
sent = str(sent).replace("''", '"')
sent = str(sent).replace('-LRB-', '(')
sent = str(sent).replace('-RRB-', ')')
sent = str(sent).replace('-LSB-', '(')
sent = str(sent).replace('-RSB-', ')')
return sent
def collapse_role_type(role_type):
'''
collapse role types from 36 to 28 following Bishan Yang 2016
we also have to handle types like 'Beneficiary#Recipient'
:param role_type:
:return:
'''
if role_type.startswith('Time-'):
return 'Time'
idx = role_type.find('#')
if idx != -1:
role_type = role_type[:idx]
return role_type
def normalize_tok(tok, lower=False, normalize_digits=False):
if lower:
tok = tok.lower()
if normalize_digits:
tok = re.sub(r"\d", "0", tok)
tok = re.sub(r"^(\d+[,])*(\d+)$", "0", tok)
return tok
def capitalize_first_char(sent):
sent = str(sent[0]).upper() + sent[1:]
return sent