import os import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity import numpy as np import pandas as pd # Load pre-trained Sentence Transformer model model_sentence_transformer = SentenceTransformer('LaBSE') # Load questions and answers from the CSV file df = pd.read_csv('combined_questions_and_answers.csv') # Encode all questions in the dataset question_embeddings = model_sentence_transformer.encode(df['Question'].tolist()) # Hugging Face API details for Meta-Llama 3B HF_TOKEN = os.environ.get("HUGGINGFACE_API_KEY", None) if not HF_TOKEN: raise ValueError("Hugging Face API key not found in environment variables. Please set the HUGGINGFACE_API_KEY environment variable.") # Load the tokenizer and model tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct") model = AutoModelForCausalLM.from_pretrained("meta-llama/Meta-Llama-3-8B-Instruct", device_map="auto") # Function to refine and translate text using Meta-Llama 3B def refine_text(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=50) return tokenizer.decode(outputs[0], skip_special_tokens=True) # Function to find the most similar question and provide the answer def get_answer(user_question, threshold=0.30): # Encode the user question user_embedding = model_sentence_transformer.encode(user_question) # Calculate cosine similarities similarities = cosine_similarity([user_embedding], question_embeddings) # Find the most similar question max_similarity = np.max(similarities) if max_similarity > threshold: # Get the index of the most similar question similar_question_idx = np.argmax(similarities) # Retrieve the corresponding answer answer = df.iloc[similar_question_idx]['Answer'] # Refine the answer using Meta-Llama 3B refined_answer = refine_text(f"Refine this answer: {answer}") return refined_answer, max_similarity else: return "The question appears to be out of domain. Kindly ask questions related to blood donations.", max_similarity # Gradio app def gradio_app(user_question): answer, similarity = get_answer(user_question) return f"Similarity: {similarity}\nAnswer: {answer}" # Launch the Gradio app iface = gr.Interface( fn=gradio_app, inputs=gr.Textbox(label="Enter your question"), outputs=gr.Textbox(label="Answer"), title="Blood Donation Q&A", description="Ask questions related to blood donation and get answers.", ) iface.launch()