Skip to content

Commit

Permalink
Updates to matthew PC
Browse files Browse the repository at this point in the history
1/7/2022
  • Loading branch information
zjlee8888 committed Jul 1, 2022
1 parent f3397c5 commit f7b5a9f
Show file tree
Hide file tree
Showing 5 changed files with 108 additions and 108 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@

model/phase1_74Q.data-00000-of-00001
model/phase1_74Q.index
__pycache__/chartbot_config.cpython-39.pyc
Binary file modified __pycache__/api.cpython-39.pyc
Binary file not shown.
68 changes: 31 additions & 37 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,58 +4,52 @@
import numpy as np
import json

from transformers import AutoTokenizer
import numpy as np
from transformers import TFAutoModelForSequenceClassification
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
import pandas as pd
from chartbot_config import *


app = Flask(__name__)
api = Api(app)

parser = reqparse.RequestParser()
parser.add_argument('data')

df = pd.read_csv(raw_data_path, encoding = 'unicode_escape')

num_labels = df.Intent.nunique()
df = df.melt(id_vars=["Domain", "Sub domain", "Intent", "Answer Format"]).drop("variable", axis = 1)
Classes_dict_1 = dict(zip(list(df["Answer Format"].unique()), [i for i in range(df["Intent"].nunique())]))
Classes_dict = {value:key for key, value in Classes_dict_1.items()}

tokenizer = AutoTokenizer.from_pretrained(checkpoint)
model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_labels, problem_type="multi_label_classification")
model.load_weights(model_path)

opt = Adam()
loss = BinaryCrossentropy(from_logits=True)

model.compile(
optimizer = opt,
loss = loss,
metrics=["accuracy"],
)

# Define how the api will respond to the post requests
class QuestionClassifier(Resource):
@app.route('/foo', methods=['POST'])
def post():

data = request.json
sentence = data['data']
tokenized_dataset = tokenizer(sentence, padding=True, truncation=True, return_tensors = "tf")
output = model(**tokenized_dataset)["logits"]
class_preds = [np.argmax(i) if np.sum(i) == 1 else len(i) for i in output > 0][0]

return jsonify(Classes_dict.get(class_preds, "Sorry I cannot understand."))

return jsonify(data=Classes_dict.get(class_preds, "Sorry I cannot understand."))

api.add_resource(QuestionClassifier, '/chartbot')

if __name__ == '__main__':

# Load model
from transformers import AutoTokenizer
import numpy as np
from transformers import TFAutoModelForSequenceClassification
from tensorflow.keras.losses import BinaryCrossentropy
from tensorflow.keras.optimizers import Adam
import pandas as pd
from chartbot_config import *

checkpoint = checkpoint
tokenizer = AutoTokenizer.from_pretrained(checkpoint)

df = pd.read_csv(raw_data_path, encoding = 'unicode_escape')
num_labels = df.Intent.nunique()

model = TFAutoModelForSequenceClassification.from_pretrained(checkpoint, num_labels=num_labels, problem_type="multi_label_classification")
model.load_weights(model_path)

opt = Adam()
loss = BinaryCrossentropy(from_logits=True)

model.compile(
optimizer = opt,
loss = loss,
metrics=["accuracy"],
)

df = df.melt(id_vars=["Domain", "Sub domain", "Intent", "Answer Format"]).drop("variable", axis = 1)
Classes_dict_1 = dict(zip(list(df["Answer Format"].unique()), [i for i in range(df["Intent"].nunique())]))
Classes_dict = {value:key for key, value in Classes_dict_1.items()}

app.run(debug=True)
2 changes: 2 additions & 0 deletions model/checkpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
model_checkpoint_path: "phase1_74Q"
all_model_checkpoint_paths: "phase1_74Q"
Loading

0 comments on commit f7b5a9f

Please sign in to comment.