Spaces:
Sleeping
Sleeping
DEBUG: base64 -> plain text
Browse files- app.py +17 -10
- functions.py +18 -0
app.py
CHANGED
@@ -2,9 +2,8 @@ import io
|
|
2 |
import tempfile
|
3 |
from ipaddress import ip_address
|
4 |
from typing import Optional
|
5 |
-
import nltk
|
6 |
-
import jwt
|
7 |
import base64
|
|
|
8 |
import json
|
9 |
from click import option
|
10 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
@@ -265,7 +264,7 @@ async def loadPDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
265 |
"output": text,
|
266 |
"source": source
|
267 |
}
|
268 |
-
numTokens = len(" ".join([
|
269 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
270 |
fileName = createDataSourceName(sourceName=source)
|
271 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
@@ -297,7 +296,7 @@ async def loadImagePDF(vectorstore: str, pdf: UploadFile = File(...)):
|
|
297 |
}
|
298 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
299 |
fileName = createDataSourceName(sourceName=source)
|
300 |
-
numTokens = len(" ".join([
|
301 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
302 |
response = (
|
303 |
supabase.table("ConversAI_ChatbotDataSources")
|
@@ -321,8 +320,8 @@ class AddText(BaseModel):
|
|
321 |
|
322 |
@app.post("/loadText")
|
323 |
async def loadText(addTextConfig: AddText):
|
324 |
-
trackUsage(vectorstore=vectorstore, endpoint="/loadText")
|
325 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
|
|
326 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
327 |
text = cleanText(text = text)
|
328 |
dct = {
|
@@ -389,7 +388,7 @@ async def loadWebURLs(loadWebsite: LoadWebsite):
|
|
389 |
"output": text,
|
390 |
"source": source
|
391 |
}
|
392 |
-
numTokens = len(" ".join([
|
393 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
394 |
fileName = createDataSourceName(sourceName=source)
|
395 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
@@ -472,7 +471,7 @@ async def loadYoutubeTranscript(ytTranscript: YtTranscript):
|
|
472 |
"output": text,
|
473 |
"source": "www.youtube.com"
|
474 |
}
|
475 |
-
numTokens = len(" ".join([
|
476 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
477 |
fileName = createDataSourceName(sourceName="youtube")
|
478 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
@@ -531,6 +530,13 @@ async def listChatbotSources(vectorstore: str):
|
|
531 |
return result
|
532 |
|
533 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
534 |
@app.post("/deleteChatbotSource")
|
535 |
async def deleteChatbotSource(vectorstore: str, dataSourceName: str):
|
536 |
trackUsage(vectorstore=vectorstore, endpoint="/deleteChatbotSource")
|
@@ -552,7 +558,8 @@ class LoadEditedJson(BaseModel):
|
|
552 |
async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
|
553 |
username, chatbotName = loadEditedJsonConfig.vectorstore.split("$")[1], loadEditedJsonConfig.vectorstore.split("$")[2]
|
554 |
trackUsage(vectorstore=loadEditedJsonConfig.vectorstore, endpoint="/loadEditedJson")
|
555 |
-
jsonData =
|
|
|
556 |
fileName = createDataSourceName(loadEditedJsonConfig.dataSourceName)
|
557 |
response = supabase.storage.from_("ConversAI").upload(file=jsonData, path=f"{fileName}_data.json")
|
558 |
response = (
|
@@ -608,7 +615,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
608 |
content = file["output"]
|
609 |
fileSource = file["source"]
|
610 |
texts.append(".".join(
|
611 |
-
[
|
612 |
"\n", " "))
|
613 |
sources.append(fileSource)
|
614 |
elif fileType == "/loadText":
|
@@ -624,7 +631,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
|
|
624 |
content = file["output"]
|
625 |
fileSource = file["source"]
|
626 |
texts.append(".".join(
|
627 |
-
[
|
628 |
"\n", " "))
|
629 |
sources.append(fileSource)
|
630 |
else:
|
|
|
2 |
import tempfile
|
3 |
from ipaddress import ip_address
|
4 |
from typing import Optional
|
|
|
|
|
5 |
import base64
|
6 |
+
import jwt
|
7 |
import json
|
8 |
from click import option
|
9 |
from jwt import ExpiredSignatureError, InvalidTokenError
|
|
|
264 |
"output": text,
|
265 |
"source": source
|
266 |
}
|
267 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
268 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
269 |
fileName = createDataSourceName(sourceName=source)
|
270 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
296 |
}
|
297 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
298 |
fileName = createDataSourceName(sourceName=source)
|
299 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
300 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
301 |
response = (
|
302 |
supabase.table("ConversAI_ChatbotDataSources")
|
|
|
320 |
|
321 |
@app.post("/loadText")
|
322 |
async def loadText(addTextConfig: AddText):
|
|
|
323 |
vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
|
324 |
+
trackUsage(vectorstore=vectorstore, endpoint="/loadText")
|
325 |
username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
|
326 |
text = cleanText(text = text)
|
327 |
dct = {
|
|
|
388 |
"output": text,
|
389 |
"source": source
|
390 |
}
|
391 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
392 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
393 |
fileName = createDataSourceName(sourceName=source)
|
394 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
471 |
"output": text,
|
472 |
"source": "www.youtube.com"
|
473 |
}
|
474 |
+
numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
|
475 |
dct = json.dumps(dct, indent=1).encode("utf-8")
|
476 |
fileName = createDataSourceName(sourceName="youtube")
|
477 |
response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
|
|
|
530 |
return result
|
531 |
|
532 |
|
533 |
+
@app.post("/getDataSource")
|
534 |
+
async def getDataSource(vectorstore: str, sourceUrl: str):
|
535 |
+
trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
|
536 |
+
r = requests.get(sourceUrl)
|
537 |
+
return encodeToBase64(eval(r.content.decode("utf-8")))
|
538 |
+
|
539 |
+
|
540 |
@app.post("/deleteChatbotSource")
|
541 |
async def deleteChatbotSource(vectorstore: str, dataSourceName: str):
|
542 |
trackUsage(vectorstore=vectorstore, endpoint="/deleteChatbotSource")
|
|
|
558 |
async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
|
559 |
username, chatbotName = loadEditedJsonConfig.vectorstore.split("$")[1], loadEditedJsonConfig.vectorstore.split("$")[2]
|
560 |
trackUsage(vectorstore=loadEditedJsonConfig.vectorstore, endpoint="/loadEditedJson")
|
561 |
+
jsonData = decodeBase64(loadEditedJsonConfig.jsonData)
|
562 |
+
jsonData = json.dumps(jsonData, indent = 1).encode("utf-8")
|
563 |
fileName = createDataSourceName(loadEditedJsonConfig.dataSourceName)
|
564 |
response = supabase.storage.from_("ConversAI").upload(file=jsonData, path=f"{fileName}_data.json")
|
565 |
response = (
|
|
|
615 |
content = file["output"]
|
616 |
fileSource = file["source"]
|
617 |
texts.append(".".join(
|
618 |
+
[content[key] for key in content.keys()]).replace(
|
619 |
"\n", " "))
|
620 |
sources.append(fileSource)
|
621 |
elif fileType == "/loadText":
|
|
|
631 |
content = file["output"]
|
632 |
fileSource = file["source"]
|
633 |
texts.append(".".join(
|
634 |
+
[content[key] for key in content.keys()]).replace(
|
635 |
"\n", " "))
|
636 |
sources.append(fileSource)
|
637 |
else:
|
functions.py
CHANGED
@@ -356,6 +356,24 @@ def extractTextFromUrlList(urls):
|
|
356 |
return {x: y for x, y in zip(urls, texts)}
|
357 |
|
358 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
359 |
def createDataSourceName(sourceName):
|
360 |
sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
|
361 |
if sourceName not in sources:
|
|
|
356 |
return {x: y for x, y in zip(urls, texts)}
|
357 |
|
358 |
|
359 |
+
def encodeToBase64(dct: dict):
|
360 |
+
for key in dct:
|
361 |
+
if type(dct[key]) == str:
|
362 |
+
dct[key] = base64.b64encode(dct[key].encode("utf-8")).decode("utf-8")
|
363 |
+
elif type(dct[key]) == dict:
|
364 |
+
dct[key] = encodeToBase64(dct[key])
|
365 |
+
return dct
|
366 |
+
|
367 |
+
|
368 |
+
def decodeBase64(dct: dict):
|
369 |
+
for key in dct:
|
370 |
+
if type(dct[key]) == str:
|
371 |
+
dct[key] = base64.b64decode(dct[key].encode("utf-8")).decode("utf-8")
|
372 |
+
elif type(dct[key]) == dict:
|
373 |
+
dct[key] = decodeBase64(dct[key])
|
374 |
+
return dct
|
375 |
+
|
376 |
+
|
377 |
def createDataSourceName(sourceName):
|
378 |
sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
|
379 |
if sourceName not in sources:
|