from llama_cpp.server.app import create_app, Settings | |
# from fastapi.staticfiles import StaticFiles | |
from fastapi.responses import HTMLResponse | |
import os | |
print("os.cpu_count()", os.cpu_count()) | |
app = create_app( | |
Settings( | |
n_threads=os.cpu_count(), | |
model="model/ggmlv3-model.bin", | |
embedding=False | |
) | |
) | |
# app.mount("/static", StaticFiles(directory="static"), name="static") | |
async def read_items(): | |
with open("README.md", "r") as f: | |
content = f.read() | |
return content | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host=os.environ["HOST"], port=os.environ["PORT"]) | |