from llama_cpp.server.app import create_app, Settings # from fastapi.staticfiles import StaticFiles from fastapi.responses import HTMLResponse import os print("os.cpu_count()", os.cpu_count()) app = create_app( Settings( n_threads=os.cpu_count(), model="model/ggmlv3-model.bin", embedding=False ) ) # app.mount("/static", StaticFiles(directory="static"), name="static") @app.get("/", response_class=HTMLResponse) async def read_items(): with open("README.md", "r") as f: content = f.read() return content if __name__ == "__main__": import uvicorn uvicorn.run(app, host=os.environ["HOST"], port=os.environ["PORT"])