Spaces:
Runtime error
Runtime error
lhzstar
commited on
Commit
·
2a846a9
1
Parent(s):
b55470f
new commits
Browse files- celebbot.py +9 -18
- run_eval.py +19 -22
- test.py +0 -5
celebbot.py
CHANGED
@@ -50,10 +50,6 @@ class CelebBot():
|
|
50 |
self.text = ""
|
51 |
print(f"me --> No audio recognized")
|
52 |
|
53 |
-
|
54 |
-
def wake_up(self, text):
|
55 |
-
return True if "hey " + self.name in text.lower() else False
|
56 |
-
|
57 |
def text_to_speech(self, autoplay=True):
|
58 |
import run_tts
|
59 |
return run_tts.tts(self.text, "_".join(self.name.split(" ")), self.spacy_model, autoplay)
|
@@ -98,21 +94,16 @@ class CelebBot():
|
|
98 |
|
99 |
def question_answer(self, instruction1='', knowledge=''):
|
100 |
if self.text != "":
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
else:
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
instruction1 = f"Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
112 |
-
query = f"Context: {instruction1} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
|
113 |
-
input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
|
114 |
-
outputs = self.QA_model.generate(input_ids, max_length=1024)
|
115 |
-
self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
116 |
|
117 |
# instruction2 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge'
|
118 |
# query = f"{instruction2} [knowledge] {self.text} {answer} [question] {self.name}, {self.text}"
|
|
|
50 |
self.text = ""
|
51 |
print(f"me --> No audio recognized")
|
52 |
|
|
|
|
|
|
|
|
|
53 |
def text_to_speech(self, autoplay=True):
|
54 |
import run_tts
|
55 |
return run_tts.tts(self.text, "_".join(self.name.split(" ")), self.spacy_model, autoplay)
|
|
|
94 |
|
95 |
def question_answer(self, instruction1='', knowledge=''):
|
96 |
if self.text != "":
|
97 |
+
if re.search(re.compile(rf'\b(you|your|{self.name})\b', flags=re.IGNORECASE), self.text) != None:
|
98 |
+
instruction1 = f"You are a celebrity named {self.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
99 |
+
|
100 |
+
knowledge = self.retrieve_knowledge_assertions()
|
101 |
else:
|
102 |
+
instruction1 = f"Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
103 |
+
query = f"Context: {instruction1} {knowledge}\n\nQuestion: {self.text}\n\nAnswer:"
|
104 |
+
input_ids = self.QA_tokenizer(f"{query}", return_tensors="pt").input_ids
|
105 |
+
outputs = self.QA_model.generate(input_ids, max_length=1024)
|
106 |
+
self.text = self.QA_tokenizer.decode(outputs[0], skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
107 |
|
108 |
# instruction2 = f'[Instruction] You are a celebrity named {self.name}. You need to answer the question based on knowledge'
|
109 |
# query = f"{instruction2} [knowledge] {self.text} {answer} [question] {self.name}, {self.text}"
|
run_eval.py
CHANGED
@@ -4,27 +4,30 @@ import spacy
|
|
4 |
import json
|
5 |
import evaluate
|
6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
|
|
|
7 |
|
8 |
from utils import *
|
9 |
from celebbot import CelebBot
|
10 |
|
11 |
-
DEBUG=True
|
12 |
QA_MODEL_ID = "google/flan-t5-xl"
|
13 |
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
|
|
|
14 |
|
15 |
def evaluate_system():
|
16 |
-
|
|
|
|
|
17 |
celeb_data = json.load(json_file)
|
18 |
-
references = [val['answers'] for val in list(celeb_data.
|
19 |
references = list(itertools.chain.from_iterable(references))
|
20 |
predictions = []
|
21 |
|
22 |
QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
|
23 |
-
QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID)
|
24 |
sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
|
25 |
-
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID)
|
26 |
|
27 |
-
for name in
|
28 |
gender = celeb_data[name]["gender"]
|
29 |
knowledge = celeb_data[name]["knowledge"]
|
30 |
|
@@ -50,22 +53,16 @@ def evaluate_system():
|
|
50 |
knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
|
51 |
|
52 |
ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
|
53 |
-
|
54 |
-
|
55 |
-
ai.speech_to_text()
|
56 |
-
else:
|
57 |
-
# ai.text = input("Your question: ")
|
58 |
-
pass
|
59 |
-
|
60 |
-
ai.text = q
|
61 |
-
if ai.text != "":
|
62 |
-
print("me --> ", ai.text)
|
63 |
-
|
64 |
-
predictions.append(ai.question_answer())
|
65 |
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
69 |
|
70 |
file = open('predictions.txt','w')
|
71 |
for prediction in predictions:
|
@@ -86,7 +83,7 @@ def evaluate_system():
|
|
86 |
|
87 |
bertscore = evaluate.load("bertscore")
|
88 |
results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
|
89 |
-
print(f"F1: {round(sum(results['f1'])/len(results['f1']))}")
|
90 |
|
91 |
if __name__ == "__main__":
|
92 |
evaluate_system()
|
|
|
4 |
import json
|
5 |
import evaluate
|
6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
|
7 |
+
import torch
|
8 |
|
9 |
from utils import *
|
10 |
from celebbot import CelebBot
|
11 |
|
|
|
12 |
QA_MODEL_ID = "google/flan-t5-xl"
|
13 |
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
|
14 |
+
celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]
|
15 |
|
16 |
def evaluate_system():
|
17 |
+
|
18 |
+
device = 'cpu'
|
19 |
+
with open("data.json", encoding='utf-8') as json_file:
|
20 |
celeb_data = json.load(json_file)
|
21 |
+
references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
|
22 |
references = list(itertools.chain.from_iterable(references))
|
23 |
predictions = []
|
24 |
|
25 |
QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
|
26 |
+
QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device)
|
27 |
sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
|
28 |
+
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)
|
29 |
|
30 |
+
for name in celeb_names:
|
31 |
gender = celeb_data[name]["gender"]
|
32 |
knowledge = celeb_data[name]["knowledge"]
|
33 |
|
|
|
53 |
knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
|
54 |
|
55 |
ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
|
56 |
+
if re.search(re.compile(rf'\b(you|your|{ai.name})\b', flags=re.IGNORECASE), ai.text) != None:
|
57 |
+
instruction1 = f"You are a celebrity named {ai.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
+
knowledge = ai.retrieve_knowledge_assertions()
|
60 |
+
else:
|
61 |
+
instruction1 = f"Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
|
62 |
+
queries = [f"Context: {instruction1} {knowledge}\n\nQuestion: {q}\n\nAnswer:" for q in celeb_data[name]["questions"]]
|
63 |
+
input_ids = ai.QA_tokenizer(f"{queries}", return_tensors="pt").input_ids.to(device)
|
64 |
+
outputs = ai.QA_model.generate(input_ids, max_length=1024)
|
65 |
+
predictions+= ai.QA_tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
66 |
|
67 |
file = open('predictions.txt','w')
|
68 |
for prediction in predictions:
|
|
|
83 |
|
84 |
bertscore = evaluate.load("bertscore")
|
85 |
results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
|
86 |
+
print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")
|
87 |
|
88 |
if __name__ == "__main__":
|
89 |
evaluate_system()
|
test.py
DELETED
@@ -1,5 +0,0 @@
|
|
1 |
-
import evaluate
|
2 |
-
|
3 |
-
bertscore = evaluate.load("bertscore")
|
4 |
-
results = bertscore.compute(predictions=["I am from Toronto."], references=["Hey"],rescale_with_baseline=True, lang="en")
|
5 |
-
print(results)
|
|
|
|
|
|
|
|
|
|
|
|