Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
import torch | |
from sklearn.metrics.pairwise import cosine_similarity | |
import numpy as np | |
# Load a pre-trained model and tokenizer from Hugging Face | |
model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained(model_name) | |
# Sample dataset of questions and answers | |
dataset = [ | |
("What is the capital of France?", "Paris is the capital of France."), | |
("Who is the creator of Python?", "Guido van Rossum created Python."), | |
("What is the tallest mountain in the world?", "Mount Everest is the tallest mountain in the world."), | |
] | |
# Function to find the most relevant answer | |
def find_most_relevant_answer(question): | |
question_embedding = model(**tokenizer(question, return_tensors="pt", padding=True, truncation=True))[0].mean(dim=1).detach().numpy() | |
highest_similarity = -1 | |
most_relevant_answer = "" | |
for q, a in dataset: | |
answer_embedding = model(**tokenizer(q, return_tensors="pt", padding=True, truncation=True))[0].mean(dim=1).detach().numpy() | |
similarity = cosine_similarity([question_embedding], [answer_embedding])[0][0] | |
if similarity > highest_similarity: | |
highest_similarity = similarity | |
most_relevant_answer = a | |
return most_relevant_answer | |
# Set up Gradio interface | |
def chat_with_bot(question): | |
return find_most_relevant_answer(question) | |
iface = gr.Interface(fn=chat_with_bot, inputs="text", outputs="text", title="Simple QA Chatbot") | |
iface.launch() | |