-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathenvironment_CoreNLP.py
94 lines (72 loc) · 2.63 KB
/
environment_CoreNLP.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import random
import string
import data
from corpus_utils import tokenize_sentence, LanguageIndex
from itertools import takewhile
import random
from difflib import SequenceMatcher
from pycorenlp import StanfordCoreNLP
import random
import math
import pdb
CONVO_LEN = 1
MIN_UTTERANCE_LEN = 4
MAX_UTTERANCE_LEN = 20
from data import BEGIN_TAG, END_TAG, EMPTY_TOKEN, UNK_TOKEN
def char_tokenizer(s: str):
return list(s)
class Environment:
@property
def lang(self):
return self._lang
def __init__(self):
self.reset()
# TO DO: this should be a new test list of questions, assuming pretrained with conv()
self._questions, self._answers = data.load_conv_text()
self._lang = LanguageIndex(
self._questions + self._answers,
#tokenizer=lambda s: list(s),
empty_token=EMPTY_TOKEN,
unknown_token=UNK_TOKEN
)
self.stanford = StanfordCoreNLP('http://localhost:9000')
#self.stanford = StanfordCoreNLP('http://127.0.0.1:9000')
#self.stanford = StanfordCoreNLP('http://0.0.0.0:9000')
def step(self, action):
reward = self.calc_reward(action)
done = len(self.history) >= CONVO_LEN # Present, but NOT USED
self.history.append(action)
# <------- TO DO: decide if randomly sampled!
next_state = random.sample(self._questions, 1)[0]
# state = char_tokenizer(state)[:MAX_UTTERANCE_LEN]
next_state = ''.join(next_state)
next_state = f'{BEGIN_TAG} {next_state} {END_TAG}'
self.history.append(next_state)
return next_state, reward, done
def reset(self):
# random.seed(48)
self.history = []
def calc_reward(self, utterance: str):
# Use CoreNLP to calculate rewards
result = self.stanford.annotate(utterance,
properties={
'annotators': 'sentiment',
'outputFormat': 'json',
'timeout': '5000'
})
# Result types from CoreNLP:
# negative: 1; # neutral: 2; positive: 3
s_scores = [int(s['sentimentValue']) - 2 for s in result['sentences']]
reward = math.tanh(sum(s_scores))
return reward
# some test code
if __name__ == "__main__":
env = Environment()
done = False
action = "I love you!"
state = ""
# action = "hello"
prev_state = state
state, reward, done = env.step(action)
print(f"env: {prev_state} -> bot: {action} reward: {reward}")
action = state