Adipta commited on
Commit
5cff118
·
verified ·
1 Parent(s): 4bdf692

Update pdfchatbot.py

Browse files
Files changed (1) hide show
  1. pdfchatbot.py +6 -37
pdfchatbot.py CHANGED
@@ -6,16 +6,15 @@ import weaviate
6
  import os
7
  from PIL import Image
8
  from config import MODEL_CONFIG
9
- from langchain_community.embeddings import HuggingFaceEmbeddings
10
  from langchain_weaviate.vectorstores import WeaviateVectorStore
11
  from langchain.text_splitter import CharacterTextSplitter
12
- from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
13
  from langchain.chains import ConversationalRetrievalChain
14
  from langchain_community.document_loaders import PyPDFLoader
15
  from langchain.prompts import PromptTemplate
16
- from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
17
 
18
  os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_TOKEN")
 
19
 
20
  class PDFChatBot:
21
  def __init__(self):
@@ -66,7 +65,7 @@ class PDFChatBot:
66
  """
67
  Load embeddings from Hugging Face and set in the config file.
68
  """
69
- self.embeddings = HuggingFaceEmbeddings(model_name=MODEL_CONFIG.MODEL_EMBEDDINGS)
70
 
71
  def load_vectordb(self):
72
  """
@@ -82,42 +81,15 @@ class PDFChatBot:
82
 
83
  self.vectordb = WeaviateVectorStore.from_documents(docs, self.embeddings, client=weaviate_client)
84
 
85
- def load_tokenizer(self):
86
- """
87
- Load the tokenizer from Hugging Face and set in the config file.
88
- """
89
- self.tokenizer = AutoTokenizer.from_pretrained(MODEL_CONFIG.AUTO_TOKENIZER, token=os.getenv("HUGGINGFACE_API_TOKEN"))
90
-
91
- def load_model(self):
92
- """
93
- Load the causal language model from Hugging Face and set in the config file.
94
- """
95
- self.model = AutoModelForCausalLM.from_pretrained(
96
- MODEL_CONFIG.MODEL_LLM,
97
- device_map='auto',
98
- torch_dtype=torch.float32,
99
- token=os.getenv("HUGGINGFACE_API_TOKEN"),
100
- load_in_8bit=False
101
- )
102
-
103
- def create_pipeline(self):
104
- """
105
- Create a pipeline for text generation using the loaded model and tokenizer.
106
- """
107
- pipe = pipeline(
108
- model=self.model,
109
- task='text-generation',
110
- tokenizer=self.tokenizer,
111
- max_new_tokens=200
112
- )
113
- self.pipeline = HuggingFacePipeline(pipeline=pipe)
114
 
115
  def create_chain(self):
116
  """
117
  Create a Conversational Retrieval Chain
118
  """
 
 
119
  self.chain = ConversationalRetrievalChain.from_llm(
120
- self.pipeline,
121
  chain_type="stuff",
122
  retriever=self.vectordb.as_retriever(search_kwargs={"k": 1}),
123
  condense_question_prompt=self.prompt,
@@ -135,9 +107,6 @@ class PDFChatBot:
135
  self.documents = PyPDFLoader(file.name).load()
136
  self.load_embeddings()
137
  self.load_vectordb()
138
- self.load_tokenizer()
139
- self.load_model()
140
- self.create_pipeline()
141
  self.create_chain()
142
 
143
  def generate_response(self, history, query, file):
 
6
  import os
7
  from PIL import Image
8
  from config import MODEL_CONFIG
9
+ from langchain_openai import OpenAIEmbeddings
10
  from langchain_weaviate.vectorstores import WeaviateVectorStore
11
  from langchain.text_splitter import CharacterTextSplitter
 
12
  from langchain.chains import ConversationalRetrievalChain
13
  from langchain_community.document_loaders import PyPDFLoader
14
  from langchain.prompts import PromptTemplate
 
15
 
16
  os.environ["HUGGINGFACE_API_TOKEN"] = os.getenv("HUGGINGFACE_API_TOKEN")
17
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
18
 
19
  class PDFChatBot:
20
  def __init__(self):
 
65
  """
66
  Load embeddings from Hugging Face and set in the config file.
67
  """
68
+ self.embeddings = OpenAIEmbeddings(model=MODEL_CONFIG.MODEL_EMBEDDINGS)
69
 
70
  def load_vectordb(self):
71
  """
 
81
 
82
  self.vectordb = WeaviateVectorStore.from_documents(docs, self.embeddings, client=weaviate_client)
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
 
85
  def create_chain(self):
86
  """
87
  Create a Conversational Retrieval Chain
88
  """
89
+ llm = OpenAI(openai_api_key=os.getenv("OPENAI_API_KEY"))
90
+
91
  self.chain = ConversationalRetrievalChain.from_llm(
92
+ llm,
93
  chain_type="stuff",
94
  retriever=self.vectordb.as_retriever(search_kwargs={"k": 1}),
95
  condense_question_prompt=self.prompt,
 
107
  self.documents = PyPDFLoader(file.name).load()
108
  self.load_embeddings()
109
  self.load_vectordb()
 
 
 
110
  self.create_chain()
111
 
112
  def generate_response(self, history, query, file):