Spaces:
Sleeping
Sleeping
from fastapi import FastAPI, File, UploadFile | |
from pydantic import BaseModel | |
from pathlib import Path | |
from fastapi import Form | |
from fastapi.responses import JSONResponse | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from PyPDF2 import PdfReader | |
from fastapi import Depends | |
app = FastAPI() | |
class FileToProcess(BaseModel): | |
uploaded_file: UploadFile = File(...) | |
async def home(): | |
return "API Working!" | |
async def upload_file(username: str, file_to_process: FileToProcess = Depends()): | |
uploaded_file = file_to_process.uploaded_file | |
path_to_save_file = Path.home() / username / "saved_files" | |
path_to_save_file.mkdir(parents=True, exist_ok=True) | |
file_location = f"{path_to_save_file}/{uploaded_file.filename}" | |
with open(file_location, "wb+") as file_object: | |
file_object.write(uploaded_file.file.read()) | |
# 下面是你要处理的代码 | |
text_splitter = RecursiveCharacterTextSplitter( | |
#separator = "\n", | |
chunk_size = 500, | |
chunk_overlap = 100, #striding over the text | |
length_function = len, | |
) | |
doc_reader = PdfReader(file_location) | |
raw_text = '' | |
for i, page in enumerate(doc_reader.pages): | |
text = page.extract_text() | |
if text: | |
raw_text += text | |
temp_texts = text_splitter.split_text(raw_text) | |
print(temp_texts) | |
return {"INFO": f"File '{uploaded_file.filename}' saved to your profile."} |