File size: 2,517 Bytes
6bc94ac
 
 
 
 
 
2a846a9
6bc94ac
 
 
 
5beab45
6bc94ac
2a846a9
6bc94ac
5beab45
 
 
 
2a846a9
5beab45
 
 
 
 
6bc94ac
5beab45
 
 
 
 
 
 
 
 
6bc94ac
5beab45
 
6bc94ac
5beab45
 
 
 
 
 
6bc94ac
5beab45
 
 
 
6bc94ac
5beab45
 
 
6bc94ac
5beab45
 
 
6bc94ac
5beab45
 
 
b190683
5beab45
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import itertools
import re
import spacy
import json
import evaluate
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModel
import torch 

from utils import *
from celebbot import CelebBot

QA_MODEL_ID = "google/flan-t5-large"
SENTTR_MODEL_ID = "sentence-transformers/all-mpnet-base-v2"
celeb_names = ["Cate Blanchett", "David Beckham", "Emma Watson", "Lady Gaga", "Madonna", "Mark Zuckerberg"]

celeb_data = get_celeb_data("data.json")
references = [val['answers'] for key, val in list(celeb_data.items()) if key in celeb_names]
references = list(itertools.chain.from_iterable(references))
predictions = []

device = 'cpu'
QA_tokenizer = AutoTokenizer.from_pretrained(QA_MODEL_ID)
QA_model = AutoModelForSeq2SeqLM.from_pretrained(QA_MODEL_ID).to(device)
sentTr_tokenizer = AutoTokenizer.from_pretrained(SENTTR_MODEL_ID)
sentTr_model = AutoModel.from_pretrained(SENTTR_MODEL_ID).to(device)

for celeb_name in celeb_names:
    gender = celeb_data[celeb_name]["gender"]
    if celeb_name == "Madonna":
        name = "Madonna-American-singer-and-actress"
    elif celeb_name == "Anne Hathaway":
        name = "Anne-Hathaway-American-actress"
    else:
        name="-".join(celeb_name.split(" "))
    knowledge = get_article(f"https://www.britannica.com/biography/{name}")

    spacy_model = spacy.load("en_core_web_lg")
    knowledge_sents = [i.text.strip() for i in spacy_model(knowledge).sents]

    ai = CelebBot(celeb_name, gender, QA_tokenizer, QA_model, sentTr_tokenizer, sentTr_model, spacy_model, knowledge_sents)
    for q in celeb_data[celeb_name]["questions"]:
        ai.text = q
        response = ai.question_answer()
        print("response:", response)
        predictions.append(response)

file = open('predictions.txt','w')
for prediction in predictions:
    file.write(prediction+"\n")
file.close()

bleu = evaluate.load("bleu")
results = bleu.compute(predictions=predictions, references=references, max_order=4)
print(f"BLEU: {round(results['bleu'], 2)}")

meteor = evaluate.load("meteor")
results = meteor.compute(predictions=predictions, references=references)
print(f"METEOR: {round(results['meteor'], 2)}")

rouge = evaluate.load("rouge")
results = rouge.compute(predictions=predictions, references=references)
print(f"ROUGE: {round(results['rougeL'], 2)}")

bertscore = evaluate.load("bertscore")
results = bertscore.compute(predictions=predictions, references=references, rescale_with_baseline=True, lang="en")
print(f"F1: {round(sum(results['f1'])/len(results['f1']), 2)}")