import { type ChildProcess, spawn } from "node:child_process"; import path from "node:path"; import { fileURLToPath } from "node:url"; import debug from "debug"; import { downloadFileFromHuggingFaceRepository } from "./downloadFileFromHuggingFaceRepository"; const fileName = path.basename(import.meta.url); const printMessage = debug(fileName); debug.enable(fileName); const SERVER_HOST = "127.0.0.1"; const SERVER_PORT = 8012; const VERBOSE_MODE = false; const MODEL_HF_REPO = "Felladrin/gguf-jina-reranker-v1-tiny-en"; const MODEL_HF_FILE = "jina-reranker-v1-tiny-en-Q8_0.gguf"; let isReady = false; export function getRerankerModelPath() { return path.resolve( path.dirname(fileURLToPath(import.meta.url)), "models", MODEL_HF_REPO, MODEL_HF_FILE, ); } async function ensureModelExists(modelPath: string) { await downloadFileFromHuggingFaceRepository( MODEL_HF_REPO, MODEL_HF_FILE, modelPath, ); } export async function startRerankerService() { printMessage("Preparing model..."); const modelPath = getRerankerModelPath(); await ensureModelExists(modelPath); printMessage("Starting service..."); const contextSize = 2048; const serverProcess = spawn( "llama-server", [ "--model", modelPath, "--ctx-size", contextSize.toString(), "--batch-size", contextSize.toString(), "--ubatch-size", contextSize.toString(), "--flash-attn", "--host", SERVER_HOST, "--port", SERVER_PORT.toString(), "--log-verbosity", VERBOSE_MODE ? "1" : "0", "--threads", "1", "--parallel", "1", "--no-warmup", "--reranking", "--pooling", "rank", ], { stdio: [ "ignore", VERBOSE_MODE ? "pipe" : "ignore", VERBOSE_MODE ? "pipe" : "ignore", ], }, ); serverProcess.stderr?.on("data", (data) => { printMessage(data.toString()); }); await new Promise((resolve) => { const checkReady = async () => { try { const response = await fetch( `http://${SERVER_HOST}:${SERVER_PORT}/health`, ); const responseJson = (await response.json()) as { status: "ok" | string; }; if (responseJson.status === "ok") { isReady = true; resolve(); } else { setTimeout(checkReady, 100); } } catch { setTimeout(checkReady, 100); } }; checkReady(); }); printMessage("Service ready!"); return serverProcess; } export function stopRerankerService(serverProcess: ChildProcess | null) { if (serverProcess) { serverProcess.kill(); } } export async function getRerankerStatus() { if (!isReady) { return false; } try { const response = await fetch(`http://${SERVER_HOST}:${SERVER_PORT}/health`); const responseJson = (await response.json()) as { status: "ok" | string }; return responseJson.status === "ok"; } catch { return false; } } export async function rerank(query: string, documents: string[]) { if (!isReady) { throw new Error("Reranker service is not ready"); } const response = await fetch( `http://${SERVER_HOST}:${SERVER_PORT}/v1/rerank`, { method: "POST", headers: { "Content-Type": "application/json", }, body: JSON.stringify({ model: "rerank", query, documents, top_n: documents.length, }), }, ); if (!response.ok) { throw new Error(`Reranking failed: ${response.statusText}`); } const result = await response.json(); return result.results; }