Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, File, UploadFile | |
from pydantic import BaseModel | |
from pathlib import Path | |
from fastapi import Form | |
from fastapi.responses import JSONResponse | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from PyPDF2 import PdfReader | |
from fastapi import Depends | |
def generate_random_string(length): | |
letters = string.ascii_lowercase | |
return ''.join(random.choice(letters) for i in range(length)) | |
app = FastAPI() | |
class FileToProcess(BaseModel): | |
uploaded_file: UploadFile = File(...) | |
async def home(): | |
return "API Working!" | |
async def upload_file(username: str, file_to_process: FileToProcess = Depends()): | |
uploaded_file = file_to_process.uploaded_file | |
random_string=generate_random_string(20) | |
file_path = random_string | |
#path_to_save_file = Path.home() / username / "saved_files" | |
#path_to_save_file.mkdir(parents=True, exist_ok=True) | |
file_saved_in_api = f"{file_path}/{uploaded_file.filename}" | |
with open(file_saved_in_api, "wb+") as file_object: | |
file_object.write(uploaded_file.file.read()) | |
# 下面是你要处理的代码 | |
text_splitter = RecursiveCharacterTextSplitter( | |
#separator = "\n", | |
chunk_size = 500, | |
chunk_overlap = 100, #striding over the text | |
length_function = len, | |
) | |
doc_reader = PdfReader(file_saved_in_api) | |
raw_text = '' | |
for i, page in enumerate(doc_reader.pages): | |
text = page.extract_text() | |
if text: | |
raw_text += text | |
temp_texts = text_splitter.split_text(raw_text) | |
print(temp_texts) | |
return {"INFO": f"File '{uploaded_file.filename}' saved to your profile."} |