Spaces:
Sleeping
Sleeping
UPDATE: loadPDF
Browse files- app.py +19 -2
- functions.py +12 -1
- secrets.env +2 -1
app.py
CHANGED
@@ -2,6 +2,7 @@ import io
|
|
2 |
import tempfile
|
3 |
import jwt
|
4 |
import base64
|
|
|
5 |
from click import option
|
6 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
7 |
from starlette import status
|
@@ -251,18 +252,34 @@ async def newChatbot(chatbotName: str, username: str):
|
|
251 |
@app.post("/loadPDF")
|
252 |
async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
|
253 |
source = pdf.filename
|
|
|
254 |
pdf = await pdf.read()
|
255 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
|
256 |
temp_file.write(pdf)
|
257 |
temp_file_path = temp_file.name
|
258 |
text = extractTextFromPdf(temp_file_path)
|
259 |
os.remove(temp_file_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
260 |
return {
|
261 |
-
"output":
|
262 |
-
"source": source
|
263 |
}
|
264 |
|
265 |
|
|
|
266 |
@app.post("/loadImagePDF")
|
267 |
async def returnText(pdf: UploadFile = File(...)):
|
268 |
source = pdf.filename
|
|
|
2 |
import tempfile
|
3 |
import jwt
|
4 |
import base64
|
5 |
+
import json
|
6 |
from click import option
|
7 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
8 |
from starlette import status
|
|
|
252 |
@app.post("/loadPDF")
|
253 |
async def addPDFData(vectorstore: str, pdf: UploadFile = File(...)):
|
254 |
source = pdf.filename
|
255 |
+
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
256 |
pdf = await pdf.read()
|
257 |
with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
|
258 |
temp_file.write(pdf)
|
259 |
temp_file_path = temp_file.name
|
260 |
text = extractTextFromPdf(temp_file_path)
|
261 |
os.remove(temp_file_path)
|
262 |
+
dct = {
|
263 |
+
"output": text
|
264 |
+
}
|
265 |
+
dct = json.dumps(dct, indent = 1)
|
266 |
+
fileName = createDataSourceName(sourceName = source)
|
267 |
+
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
268 |
+
response = (
|
269 |
+
supabase.table("ConversAI_ChatbotDataSources")
|
270 |
+
.insert({"username": username,
|
271 |
+
"chatbotName": chatbotName,
|
272 |
+
"dataSourceName": fileName,
|
273 |
+
"sourceEndpoint": "\loadPDF",
|
274 |
+
"sourceContentURL": os.path.join(os.environ["SUPABASE_PUBLIC_BASE_URL"], f"{fileName}_data.json")})
|
275 |
+
.execute()
|
276 |
+
)
|
277 |
return {
|
278 |
+
"output": "SUCCESS"
|
|
|
279 |
}
|
280 |
|
281 |
|
282 |
+
|
283 |
@app.post("/loadImagePDF")
|
284 |
async def returnText(pdf: UploadFile = File(...)):
|
285 |
source = pdf.filename
|
functions.py
CHANGED
@@ -347,4 +347,15 @@ def extractTextFromUrl(url):
|
|
347 |
def extractTextFromUrlList(urls):
|
348 |
with ThreadPoolExecutor() as executor:
|
349 |
texts = list(executor.map(extractTextFromUrl, urls))
|
350 |
-
return {x: y for x, y in zip(urls, texts)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
347 |
def extractTextFromUrlList(urls):
|
348 |
with ThreadPoolExecutor() as executor:
|
349 |
texts = list(executor.map(extractTextFromUrl, urls))
|
350 |
+
return {x: y for x, y in zip(urls, texts)}
|
351 |
+
|
352 |
+
|
353 |
+
def createDataSourceName(sourceName):
|
354 |
+
sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
|
355 |
+
if sourceName not in sources:
|
356 |
+
return sourceName
|
357 |
+
else:
|
358 |
+
i = 1
|
359 |
+
while True:
|
360 |
+
sourceName = sourceName + "-" + str(i)
|
361 |
+
return createDataSourceName(sourceName)
|
secrets.env
CHANGED
@@ -6,4 +6,5 @@ QDRANT_API_KEY=k0V8kKNulQdRLukhYy03kJcncctoDImbiPHgmvaEEsup8MwTjqgT0w
|
|
6 |
COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
|
7 |
NLTK_DATA=/app/nltk_data
|
8 |
LANGCHAIN_API_KEY=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
|
9 |
-
LANGCHAIN_TRACING_V2=true
|
|
|
|
6 |
COHERE_API_KEY=lCu3rZEjcUPAt0RsdQpQlGtgYp1uKAmuNIBdjFKq
|
7 |
NLTK_DATA=/app/nltk_data
|
8 |
LANGCHAIN_API_KEY=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
|
9 |
+
LANGCHAIN_TRACING_V2=true
|
10 |
+
SUPABASE_PUBLIC_BASE_URL=https://lvuhhlrkcuexzqtsbqyu.supabase.co/storage/v1/object/public/ConversAI
|