Spaces:
Sleeping
Sleeping
File size: 3,869 Bytes
cbb53fd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
import gradio as gr
import os
import openai
import pandas as pd
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import FAISS
from langchain.prompts import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import StrOutputParser
from langchain.chat_models import ChatOpenAI
# Set up the Hugging Face model and embeddings
model_name = "BAAI/bge-large-en-v1.5"
model_kwargs = {'device':'cuda'}
encode_kwargs = {'normalize_embeddings':True}
embedding_function = HuggingFaceBgeEmbeddings(
model_name = model_name,
model_kwargs = model_kwargs,
encode_kwargs = encode_kwargs
)
# Set the OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Load the FAISS index using LangChain's FAISS implementation
db = FAISS.load_local("Faiss", embedding_function, allow_dangerous_deserialization=True)
parser = StrOutputParser()
# Load your data (e.g., a DataFrame)
df = pd.read_pickle('df_news.pkl')
# Search function to retrieve relevant documents
def search(query):
query_embedding = embedding_function.embed_query(query).reshape(1, -1).astype('float32')
D, I = db.similarity_search_with_score(query_embedding, k=10)
results = []
for idx in I[0]:
if idx < 3327: # Adjust this based on your indexing
doc_index = idx
results.append({
'type': 'metadata',
'title': df.iloc[doc_index]['title'],
'author': df.iloc[doc_index]['author'],
'full_text': df.iloc[doc_index]['full_text'],
'source': df.iloc[doc_index]['url']
})
else:
chunk_index = idx - 3327
metadata = metadata_info[chunk_index]
doc_index = metadata['index']
chunk_text = metadata['chunk']
results.append({
'type': 'content',
'title': df.iloc[doc_index]['title'],
'author': df.iloc[doc_index]['author'],
'content': chunk_text,
'source': df.iloc[doc_index]['url']
})
return results
# Generate an answer based on the retrieved documents
def generate_answer(query):
context = search(query)
context_str = "\n\n".join([f"Title: {doc['title']}\nContent: {doc.get('content', doc.get('full_text', ''))}" for doc in context])
prompt = f"""
Answer the question based on the context below. If you can't answer the question, answer with "I don't know".
Context: {context_str}
Question: {query}
"""
# Set up the ChatOpenAI model with temperature and other parameters
chat = ChatOpenAI(
model="gpt-4",
temperature=0.2,
max_tokens=1500,
api_key=openai.api_key
)
messages = [
SystemMessagePromptTemplate.from_template("You are a helpful assistant."),
HumanMessagePromptTemplate.from_template(prompt)
]
chat_chain = LLMChain(
llm=chat,
prompt=ChatPromptTemplate.from_messages(messages)
)
# Get the response from the chat model
response = chat_chain.run(messages)
return response.strip()
# Gradio chat interface
def respond(message, history, system_message, max_tokens, temperature, top_p):
response = generate_answer(message)
yield response
# Gradio demo setup
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
],
)
if __name__ == "__main__":
demo.launch()
|