Rauhan commited on
Commit
da75ad8
·
1 Parent(s): 9245bf5

DEBUG: base64 -> plain text

Browse files
Files changed (2) hide show
  1. app.py +17 -10
  2. functions.py +18 -0
app.py CHANGED
@@ -2,9 +2,8 @@ import io
2
  import tempfile
3
  from ipaddress import ip_address
4
  from typing import Optional
5
- import nltk
6
- import jwt
7
  import base64
 
8
  import json
9
  from click import option
10
  from jwt import ExpiredSignatureError, InvalidTokenError
@@ -265,7 +264,7 @@ async def loadPDF(vectorstore: str, pdf: UploadFile = File(...)):
265
  "output": text,
266
  "source": source
267
  }
268
- numTokens = len(" ".join([base64.b64decode(text[x].encode("utf-8")).decode("utf-8") for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
269
  dct = json.dumps(dct, indent=1).encode("utf-8")
270
  fileName = createDataSourceName(sourceName=source)
271
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
@@ -297,7 +296,7 @@ async def loadImagePDF(vectorstore: str, pdf: UploadFile = File(...)):
297
  }
298
  dct = json.dumps(dct, indent=1).encode("utf-8")
299
  fileName = createDataSourceName(sourceName=source)
300
- numTokens = len(" ".join([base64.b64decode(text[x].encode("utf-8")).decode("utf-8") for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
301
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
302
  response = (
303
  supabase.table("ConversAI_ChatbotDataSources")
@@ -321,8 +320,8 @@ class AddText(BaseModel):
321
 
322
  @app.post("/loadText")
323
  async def loadText(addTextConfig: AddText):
324
- trackUsage(vectorstore=vectorstore, endpoint="/loadText")
325
  vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
 
326
  username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
327
  text = cleanText(text = text)
328
  dct = {
@@ -389,7 +388,7 @@ async def loadWebURLs(loadWebsite: LoadWebsite):
389
  "output": text,
390
  "source": source
391
  }
392
- numTokens = len(" ".join([base64.b64decode(text[x].encode("utf-8")).decode("utf-8") for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
393
  dct = json.dumps(dct, indent=1).encode("utf-8")
394
  fileName = createDataSourceName(sourceName=source)
395
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
@@ -472,7 +471,7 @@ async def loadYoutubeTranscript(ytTranscript: YtTranscript):
472
  "output": text,
473
  "source": "www.youtube.com"
474
  }
475
- numTokens = len(" ".join([base64.b64decode(text[x].encode("utf-8")).decode("utf-8") for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
476
  dct = json.dumps(dct, indent=1).encode("utf-8")
477
  fileName = createDataSourceName(sourceName="youtube")
478
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
@@ -531,6 +530,13 @@ async def listChatbotSources(vectorstore: str):
531
  return result
532
 
533
 
 
 
 
 
 
 
 
534
  @app.post("/deleteChatbotSource")
535
  async def deleteChatbotSource(vectorstore: str, dataSourceName: str):
536
  trackUsage(vectorstore=vectorstore, endpoint="/deleteChatbotSource")
@@ -552,7 +558,8 @@ class LoadEditedJson(BaseModel):
552
  async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
553
  username, chatbotName = loadEditedJsonConfig.vectorstore.split("$")[1], loadEditedJsonConfig.vectorstore.split("$")[2]
554
  trackUsage(vectorstore=loadEditedJsonConfig.vectorstore, endpoint="/loadEditedJson")
555
- jsonData = json.dumps(loadEditedJsonConfig.jsonData, indent = 1).encode("utf-8")
 
556
  fileName = createDataSourceName(loadEditedJsonConfig.dataSourceName)
557
  response = supabase.storage.from_("ConversAI").upload(file=jsonData, path=f"{fileName}_data.json")
558
  response = (
@@ -608,7 +615,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
608
  content = file["output"]
609
  fileSource = file["source"]
610
  texts.append(".".join(
611
- [base64.b64decode(content[key].encode("utf-8")).decode("utf-8") for key in content.keys()]).replace(
612
  "\n", " "))
613
  sources.append(fileSource)
614
  elif fileType == "/loadText":
@@ -624,7 +631,7 @@ async def trainChatbot(trainChatbotConfig: TrainChatbot):
624
  content = file["output"]
625
  fileSource = file["source"]
626
  texts.append(".".join(
627
- [base64.b64decode(content[key].encode("utf-8")).decode("utf-8") for key in content.keys()]).replace(
628
  "\n", " "))
629
  sources.append(fileSource)
630
  else:
 
2
  import tempfile
3
  from ipaddress import ip_address
4
  from typing import Optional
 
 
5
  import base64
6
+ import jwt
7
  import json
8
  from click import option
9
  from jwt import ExpiredSignatureError, InvalidTokenError
 
264
  "output": text,
265
  "source": source
266
  }
267
+ numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
268
  dct = json.dumps(dct, indent=1).encode("utf-8")
269
  fileName = createDataSourceName(sourceName=source)
270
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
 
296
  }
297
  dct = json.dumps(dct, indent=1).encode("utf-8")
298
  fileName = createDataSourceName(sourceName=source)
299
+ numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
300
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
301
  response = (
302
  supabase.table("ConversAI_ChatbotDataSources")
 
320
 
321
  @app.post("/loadText")
322
  async def loadText(addTextConfig: AddText):
 
323
  vectorstore, text = addTextConfig.vectorstore, addTextConfig.text
324
+ trackUsage(vectorstore=vectorstore, endpoint="/loadText")
325
  username, chatbotName = vectorstore.split("$")[1], vectorstore.split("$")[2]
326
  text = cleanText(text = text)
327
  dct = {
 
388
  "output": text,
389
  "source": source
390
  }
391
+ numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
392
  dct = json.dumps(dct, indent=1).encode("utf-8")
393
  fileName = createDataSourceName(sourceName=source)
394
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
 
471
  "output": text,
472
  "source": "www.youtube.com"
473
  }
474
+ numTokens = len(" ".join([text[x] for x in text]).translate(str.maketrans('', '', string.punctuation)).split(" "))
475
  dct = json.dumps(dct, indent=1).encode("utf-8")
476
  fileName = createDataSourceName(sourceName="youtube")
477
  response = supabase.storage.from_("ConversAI").upload(file=dct, path=f"{fileName}_data.json")
 
530
  return result
531
 
532
 
533
+ @app.post("/getDataSource")
534
+ async def getDataSource(vectorstore: str, sourceUrl: str):
535
+ trackUsage(vectorstore=vectorstore, endpoint="/getDataSource")
536
+ r = requests.get(sourceUrl)
537
+ return encodeToBase64(eval(r.content.decode("utf-8")))
538
+
539
+
540
  @app.post("/deleteChatbotSource")
541
  async def deleteChatbotSource(vectorstore: str, dataSourceName: str):
542
  trackUsage(vectorstore=vectorstore, endpoint="/deleteChatbotSource")
 
558
  async def loadEditedJson(loadEditedJsonConfig: LoadEditedJson):
559
  username, chatbotName = loadEditedJsonConfig.vectorstore.split("$")[1], loadEditedJsonConfig.vectorstore.split("$")[2]
560
  trackUsage(vectorstore=loadEditedJsonConfig.vectorstore, endpoint="/loadEditedJson")
561
+ jsonData = decodeBase64(loadEditedJsonConfig.jsonData)
562
+ jsonData = json.dumps(jsonData, indent = 1).encode("utf-8")
563
  fileName = createDataSourceName(loadEditedJsonConfig.dataSourceName)
564
  response = supabase.storage.from_("ConversAI").upload(file=jsonData, path=f"{fileName}_data.json")
565
  response = (
 
615
  content = file["output"]
616
  fileSource = file["source"]
617
  texts.append(".".join(
618
+ [content[key] for key in content.keys()]).replace(
619
  "\n", " "))
620
  sources.append(fileSource)
621
  elif fileType == "/loadText":
 
631
  content = file["output"]
632
  fileSource = file["source"]
633
  texts.append(".".join(
634
+ [content[key] for key in content.keys()]).replace(
635
  "\n", " "))
636
  sources.append(fileSource)
637
  else:
functions.py CHANGED
@@ -356,6 +356,24 @@ def extractTextFromUrlList(urls):
356
  return {x: y for x, y in zip(urls, texts)}
357
 
358
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
359
  def createDataSourceName(sourceName):
360
  sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
361
  if sourceName not in sources:
 
356
  return {x: y for x, y in zip(urls, texts)}
357
 
358
 
359
+ def encodeToBase64(dct: dict):
360
+ for key in dct:
361
+ if type(dct[key]) == str:
362
+ dct[key] = base64.b64encode(dct[key].encode("utf-8")).decode("utf-8")
363
+ elif type(dct[key]) == dict:
364
+ dct[key] = encodeToBase64(dct[key])
365
+ return dct
366
+
367
+
368
+ def decodeBase64(dct: dict):
369
+ for key in dct:
370
+ if type(dct[key]) == str:
371
+ dct[key] = base64.b64decode(dct[key].encode("utf-8")).decode("utf-8")
372
+ elif type(dct[key]) == dict:
373
+ dct[key] = decodeBase64(dct[key])
374
+ return dct
375
+
376
+
377
  def createDataSourceName(sourceName):
378
  sources = [x["dataSourceName"] for x in client.table("ConversAI_ChatbotDataSources").select("dataSourceName").execute().data]
379
  if sourceName not in sources: