Rauhan commited on
Commit
a8a5b30
·
1 Parent(s): f072aa1

UPDATE: loadPDF

Browse files
Files changed (3) hide show
  1. app.py +19 -2
  2. functions.py +12 -1
  3. secrets.env +2 -1
app.py CHANGED
@@ -2,6 +2,7 @@ import io
2
  import tempfile
3
  import jwt
4
  import base64
 
5
  from click import option
6
  from jwt import ExpiredSignatureError, InvalidTokenError
7
  from starlette import status
@@ -251,18 +252,34 @@ async def newChatbot(chatbotName: str, username: str):
251
  @app.post("/loadPDF")
252
  async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
253
  source = pdf.filename
 
254
  pdf = await pdf.read()
255
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
256
  temp_file.write(pdf)
257
  temp_file_path = temp_file.name
258
  text = extractTextFromPdf(temp_file_path)
259
  os.remove(temp_file_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
  return {
261
- "output": text,
262
- "source": source
263
  }
264
 
265
 
 
266
  @app.post("/loadImagePDF")
267
  async def returnText(pdf: UploadFile = File(...)):
268
  source = pdf.filename
 
2
  import tempfile
3
  import jwt
4
  import base64
5
+ import json
6
  from click import option
7
  from jwt import ExpiredSignatureError, InvalidTokenError
8
  from starlette import status
 
252
  @app.post("/loadPDF")
253
  async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
254
  source = pdf.filename
255
+ username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
256
  pdf = await pdf.read()
257
  with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
258
  temp_file.write(pdf)
259
  temp_file_path = temp_file.name
260
  text = extractTextFromPdf(temp_file_path)
261
  os.remove(temp_file_path)
262
+ dct = {
263
+ "output": text
264
+ }
265
+ dct = json.dumps(dct, indent = 1)
266
+ fileName = createDataSourceName(sourceName = source)
267
+ response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
268
+ response = (
269
+ supabase.table("ConversAI_ChatbotDataSources")
270
+ .insert({"username": username,
271
+ "chatbotName": chatbotName,
272
+ "dataSourceName": fileName,
273
+ "sourceEndpoint": "\loadPDF",
274
+ "sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
275
+ .execute()
276
+ )
277
  return {
278
+ "output": "SUCCESS"
 
279
  }
280
 
281
 
282
+
283
  @app.post("/loadImagePDF")
284
  async def returnText(pdf: UploadFile = File(...)):
285
  source = pdf.filename
functions.py CHANGED
@@ -347,4 +347,15 @@ def extractTextFromUrl(url):
347
  def extractTextFromUrlList(urls):
348
  with ThreadPoolExecutor() as executor:
349
  texts = list(executor.map(extractTextFromUrl, urls))
350
- return {x: y for x, y in zip(urls, texts)}
 
 
 
 
 
 
 
 
 
 
 
 
347
  def extractTextFromUrlList(urls):
348
  with ThreadPoolExecutor() as executor:
349
  texts = list(executor.map(extractTextFromUrl, urls))
350
+ return {x: y for x, y in zip(urls, texts)}
351
+
352
+
353
+ def createDataSourceName(sourceName):
354
+ sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
355
+ if sourceName not in sources:
356
+ return sourceName
357
+ else:
358
+ i = 1
359
+ while True:
360
+ sourceName = sourceName + "-" + str(i)
361
+ return createDataSourceName(sourceName)
secrets.env CHANGED
@@ -6,4 +6,5 @@ QDRANT_API_KEY=k0V8kKNulQdRLukhYy03kJcncctoDImbiPHgmvaEEsup8MwTjqgT0w
6
  COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
7
  NLTK_DATA=/app/nltk_data
8
  LANGCHAIN_API_KEY=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
9
- LANGCHAIN_TRACING_V2=true
 
 
6
  COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
7
  NLTK_DATA=/app/nltk_data
8
  LANGCHAIN_API_KEY=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
9
+ LANGCHAIN_TRACING_V2=true
10
+ SUPABASE_PUBLIC_BASE_URL=https://lvuhhlrkcuexzqtsbqyu.supabase.co/storage/v1/object/public/ConversAI