Spaces:
Runtime error
Runtime error
File size: 4,802 Bytes
6bc94ac 2a846a9 6bc94ac 7fba753 6bc94ac 2a846a9 6bc94ac 2a846a9 6bc94ac 2a846a9 6bc94ac 2a846a9 6bc94ac 2a846a9 6bc94ac 2a846a9 6bc94ac 2a846a9 6bc94ac 2a846a9 6bc94ac b190683 b55470f 2a846a9 6bc94ac |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import itertools
import re
import spacy
import json
import evaluate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
import torch
from utils import *
from celebbot import CelebBot
QA_MODEL_ID = "google/flan-t5-xl"
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]
def evaluate_system():
device = 'cpu'
with open("data.json", encoding='utf-8') as json_file:
celeb_data = json.load(json_file)
references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
references = list(itertools.chain.from_iterable(references))
predictions = []
QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device)
sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)
for name in celeb_names:
gender = celeb_data[name]["gender"]
knowledge = celeb_data[name]["knowledge"]
lname = name.split(" ")[-1]
lname_regex = re.compile(rf'\b({lname})\b')
name_regex = re.compile(rf'\b({name})\b')
lnames = lname+"βs" if not lname.endswith("s") else lname+"β"
lnames_regex = re.compile(rf'\b({lnames})\b')
names = name+"βs" if not name.endswith("s") else name+"β"
names_regex = re.compile(rf'\b({names})\b')
if gender == "M":
knowledge = re.sub(he_regex, "I", knowledge)
knowledge = re.sub(his_regex, "my", knowledge)
elif gender == "F":
knowledge = re.sub(she_regex, "I", knowledge)
knowledge = re.sub(her_regex, "my", knowledge)
knowledge = re.sub(names_regex, "my", knowledge)
knowledge = re.sub(lnames_regex, "my", knowledge)
knowledge = re.sub(name_regex, "I", knowledge)
knowledge = re.sub(lname_regex, "I", knowledge)
spacy_model = spacy.load("en_core_web_sm")
knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]
ai = CelebBot(name, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
if re.search(re.compile(rf'\b(you|your|{ai.name})\b', flags=re.IGNORECASE), ai.text) != None:
instruction1 = f"You are a celebrity named {ai.name}. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
knowledge = ai.retrieve_knowledge_assertions()
else:
instruction1 = f"Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature. If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information."
queries = [f"Context: {instruction1} {knowledge}\n\nQuestion: {q}\n\nAnswer:" for q in celeb_data[name]["questions"]]
input_ids = ai.QA_tokenizer(f"{queries}", return_tensors="pt").input_ids.to(device)
outputs = ai.QA_model.generate(input_ids, max_length=1024)
predictions+= ai.QA_tokenizer.batch_decode(outputs, skip_special_tokens=True)
file = open('predictions.txt','w')
for prediction in predictions:
file.write(prediction+"\n")
file.close()
bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references, max_order=4)
print(f"BLEU: {round(results['bleu'], 2)}")
meteor = evaluate.load("meteor")
results = meteor.compute(predictions=predictions, references=references)
print(f"METEOR: {round(results['meteor'], 2)}")
rouge = evaluate.load("rouge")
results = rouge.compute(predictions=predictions, references=references)
print(f"ROUGE: {round(results['rougeL'], 2)}")
bertscore = evaluate.load("bertscore")
results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")
if __name__ == "__main__":
evaluate_system()
|