Spaces:
Sleeping
Sleeping
import pandas as pd | |
import os | |
import gradio as gr | |
from langchain_groq import ChatGroq | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_chroma import Chroma | |
from langchain_core.prompts import PromptTemplate | |
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.runnables import RunnablePassthrough | |
# Read JSON file | |
df = pd.read_json("./tourisme_chatbot.json") | |
# Randomly sample a portion of the dataset (e.g., 30% of the data) | |
sampled_df = df.sample(frac=0.2, random_state=42) # Adjust the fraction as needed | |
context_data = [] | |
for i in range(len(sampled_df)): # Use the sampled data | |
context = "" | |
for j in range(4): | |
context += sampled_df.columns[j] | |
context += ": " | |
context += str(sampled_df.iloc[i, j]) # Ensure it's a string for concatenation | |
context += " " | |
context_data.append(context) | |
# Get the secret key from the environment | |
groq_api_key = os.environ.get('groq_api_keys') | |
# Initialize LLM (Groq) | |
llm = ChatGroq(model="llama-3.1-70b-versatile", api_key=groq_api_key) | |
# Initialize Embedding Model (HuggingFace) | |
embed_model = HuggingFaceEmbeddings(model_name="mixedbread-ai/mxbai-embed-large-v1") | |
# Create Vector Store | |
vectorstore = Chroma( | |
collection_name="tourism_dataset_store", | |
embedding_function=embed_model, | |
persist_directory="./", | |
) | |
# Add sampled data to the vector store | |
vectorstore.add_texts(context_data) | |
# Set up the retriever | |
retriever = vectorstore.as_retriever() | |
# Define prompt template | |
template = """You are a Moroccan tourism expert. | |
Use the provided context to answer the question. | |
If you don't know the answer, say so. Explain your answer in detail. | |
Do not discuss the context in your response; just provide the answer directly. | |
Context: {context} | |
Question: {question} | |
Answer:""" | |
rag_prompt = PromptTemplate.from_template(template) | |
# Set up the RAG chain | |
rag_chain = ( | |
{"context": retriever, "question": RunnablePassthrough()} | |
| rag_prompt | |
| llm | |
| StrOutputParser() | |
) | |
# Function for real-time stream of results | |
def rag_memory_stream(text): | |
partial_text = "" | |
for new_text in rag_chain.stream(text): | |
partial_text += new_text | |
yield partial_text | |
# Gradio Interface setup | |
examples = ['Tourist attraction sites in Morocco', 'What are some fun activities to do in Morocco?'] | |
title = "Real-time AI App with Groq API and LangChain to Answer Morocco Tourism questions" | |
demo = gr.Interface( | |
title=title, | |
fn=rag_memory_stream, | |
inputs="text", | |
outputs="text", | |
examples = examples, | |
allow_flagging="never", | |
) | |
if __name__ == '__main__': | |
demo.launch(share=True) | |