import os import openai import tiktoken import numpy as np import ast import pandas as pd import matplotlib.pyplot as plt import gradio as gr from dotenv import load_dotenv # Load environment variables from .env file load_dotenv() # Get API keys from environment variables OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") openai.api_key = OPENAI_API_KEY client = openai # Initialize the tokenizer for the model tokenizer = tiktoken.get_encoding('p50k_base') # Use the appropriate encoding for your model def get_embedding(text, model='text-embedding-3-small', max_tokens=7000): # Tokenize the text and truncate if necessary tokens = tokenizer.encode(text) if len(tokens) > max_tokens: tokens = tokens[:max_tokens] text = tokenizer.decode(tokens) return client.embeddings.create(input=[text],model=model).data[0].embedding data = pd.read_csv("ucdavis_health_embeddings.csv") # Handle NaN values and convert the 'embedding' column from strings to lists of floats def safe_literal_eval(x): try: return ast.literal_eval(x) except (ValueError, SyntaxError): return [] data['embedding'] = data['embedding'].apply(safe_literal_eval) # Ensure all embeddings are lists of floats and filter out empty embeddings data['embedding'] = data['embedding'].apply(lambda x: [float(i) for i in x] if isinstance(x, list) else []) data = data[data['embedding'].apply(lambda x: len(x) > 0)] def query(question): question_embedding = get_embedding(question) def fn(page_embedding): return np.dot(page_embedding, question_embedding) distance_series = data['embedding'].apply(fn) top_four = distance_series.sort_values(ascending=False).index[0:4] context_series = data.loc[top_four]['text'] context = " ".join(context_series) similarity_scores = distance_series.sort_values(ascending=False)[0:4] links_series = data.loc[top_four]['url'] links = "\n \n".join(links_series) link_list = links_series.tolist() chat_completion = client.chat.completions.create( messages=[ {"role": "system", "content": "You are a helpful assistant tasked to respond to users of UC Davos Health who are seeking information about their services"}, {"role": "user", "content": question}, {"role": "assistant", "content": f"Use this information from the UC Davis Health website and answer the user's question: {context}. Please stick to this context while answering the question. Include all important information relevant to what the user is seeking, also tell them things they should be mindful of while following instructions. Don't miss any details about timings or weekdays."} ], model="gpt-3.5-turbo" ) return chat_completion.choices[0].message.content, links, similarity_scores.tolist(), link_list def plot_bar_chart(similarity_scores, links_series): # Sort the similarity scores and links together sorted_pairs = sorted(zip(similarity_scores, links_series)) # Remove reverse=True to keep ascending order sorted_scores, sorted_links = zip(*sorted_pairs) # Create labels as "Link 1", "Link 2", etc. link_labels = [f"Link {i+1}" for i in range(len(sorted_links)-1, -1, -1)] plt.figure(figsize=(12, 8)) # Adjusting the figure size to make it larger bars = plt.barh(link_labels, sorted_scores, color='skyblue', edgecolor='black') plt.xlabel('Similarity Score') plt.ylabel('Links') plt.title('Similarity Scores Bar Chart for the above links in the same order') plt.xlim(0, 1) # Set x-axis scale from 0 to 1 plt.grid(True, axis='x') # Add labels for each bar for bar, score in zip(bars, sorted_scores): plt.text(bar.get_width() + 0.01, bar.get_y() + bar.get_height() / 2, f'{score:.2f}', va='center', ha='left') plt.tight_layout() plt.savefig('bar_chart.png') return 'bar_chart.png' # Define the Gradio interface def gradio_query(question): answer, links, similarity_scores, link_list = query(question) bar_plot_path = plot_bar_chart(similarity_scores, link_list) return answer, links, bar_plot_path interface = gr.Interface( fn=gradio_query, inputs=gr.Textbox(lines=2, placeholder="Enter your question here..."), outputs=[ gr.Textbox(label="Answer"), gr.Textbox(label="For more information, visit these links"), gr.Image(type="filepath", label="Similarity Scores Bar Chart", elem_id="bar_chart") ], title="UC Davis Health Query Assistant", description="Ask your questions about UC Davis Health services and get relevant information from their website.", css=".gradio-container #bar_chart img {width: 200%; height: auto;}" ) # Launch the interface interface.launch(share=True)