Spaces:
Sleeping
Sleeping
pip install -qU langchain-community faiss-cpu faiss-gpu langchain-openai sentence_transformers gradio | |
import faiss | |
from langchain_community.docstore.in_memory import InMemoryDocstore | |
from langchain_community.vectorstores import FAISS | |
from langchain_openai import OpenAIEmbeddings | |
import os | |
import pandas as pd | |
from uuid import uuid4 | |
from langchain_core.documents import Document | |
import numpy as np | |
from sentence_transformers import SentenceTransformer | |
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
from langchain.llms import OpenAI | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain import PromptTemplate | |
import gradio as gr | |
df = pd.read_csv('news_paper-Cleaned.csv', encoding='utf-8', on_bad_lines='skip') | |
os.environ["OPENAI_API_KEY"] = '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' | |
embeddings = OpenAIEmbeddings(model="text-embedding-3-large") | |
index = faiss.IndexFlatL2(len(embeddings.embed_query("hello world"))) | |
vector_store = FAISS( | |
embedding_function=embeddings, | |
index=index, | |
docstore=InMemoryDocstore(), | |
index_to_docstore_id={}, | |
) | |
documents = [{ | |
'title': row['title'], | |
'author': row['author'], | |
'description': row['description'], | |
'full_text' : row['full_text'] | |
} | |
for _, row in df.iterrows()] | |
full_text = [Document( | |
page_content=str(doc), | |
metadata={"source": "news"}, | |
) for doc in documents] | |
text_splitter = RecursiveCharacterTextSplitter( | |
# Set a really small chunk size, just to show. | |
chunk_size=1000, | |
chunk_overlap=100, | |
length_function=len, | |
is_separator_regex=False, | |
) | |
text_split = text_splitter.split_documents(full_text) | |
uuids = [str(uuid4()) for _ in range(len(text_split))] | |
vector_store.add_documents(documents=text_split, ids=uuids) | |
retriever = vector_store.as_retriever(search_type="mmr", search_kwargs={"k": 10}) | |
def questions(query): | |
template = """ | |
You are a helpful assistant that that can answer questions about specific data. | |
You have answer only from this Context. | |
You will receive 10 Answer return all and spilt between them by new line. | |
Question: {question} | |
Context: {context} | |
Answer: | |
""" | |
PROMPT = PromptTemplate(template=template, input_variables=['question', 'context']) | |
qa_chain = RetrievalQA.from_chain_type( | |
llm=OpenAI(), | |
retriever=retriever, | |
chain_type_kwargs={"prompt": PROMPT}, | |
) | |
return qa_chain({"query": query})['result'] | |
demo = gr.Interface(fn=questions, inputs="text", outputs="text") | |
demo.launch() |