iShare commited on
Commit
f4c8d5e
1 Parent(s): 81add90

Create main.py

Browse files
Files changed (1) hide show
  1. main.py +52 -0
main.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile
2
+ from pydantic import BaseModel
3
+ from pathlib import Path
4
+ from fastapi import Form
5
+ from fastapi.responses import JSONResponse
6
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
7
+ from PyPDF2 import PdfReader
8
+ from fastapi import Depends
9
+
10
+ def generate_random_string(length):
11
+ letters = string.ascii_lowercase
12
+ return ''.join(random.choice(letters) for i in range(length))
13
+
14
+ app = FastAPI()
15
+
16
+ class FileToProcess(BaseModel):
17
+ uploaded_file: UploadFile = File(...)
18
+
19
+ @app.get("/")
20
+ async def home():
21
+ return "API Working!"
22
+
23
+ @app.post("/file/upload")
24
+ async def upload_file(username: str, file_to_process: FileToProcess = Depends()):
25
+ uploaded_file = file_to_process.uploaded_file
26
+
27
+ random_string=generate_random_string(20)
28
+ file_path = random_string
29
+ #path_to_save_file = Path.home() / username / "saved_files"
30
+ #path_to_save_file.mkdir(parents=True, exist_ok=True)
31
+ file_saved_in_api = f"{file_path}/{uploaded_file.filename}"
32
+ with open(file_saved_in_api, "wb+") as file_object:
33
+ file_object.write(uploaded_file.file.read())
34
+
35
+ # 下面是你要处理的代码
36
+ text_splitter = RecursiveCharacterTextSplitter(
37
+ #separator = "\n",
38
+ chunk_size = 500,
39
+ chunk_overlap = 100, #striding over the text
40
+ length_function = len,
41
+ )
42
+
43
+ doc_reader = PdfReader(file_saved_in_api)
44
+ raw_text = ''
45
+ for i, page in enumerate(doc_reader.pages):
46
+ text = page.extract_text()
47
+ if text:
48
+ raw_text += text
49
+ temp_texts = text_splitter.split_text(raw_text)
50
+ print(temp_texts)
51
+
52
+ return {"INFO": f"File '{uploaded_file.filename}' saved to your profile."}