import { HF_ACCESS_TOKEN } from "$env/static/private"; | |
import { HfInference } from "@huggingface/inference"; | |
import { defaultModel } from "$lib/server/models"; | |
import type { BackendModel } from "../models"; | |
import { generateFromDefaultEndpoint } from "../generateFromDefaultEndpoint"; | |
export async function summarizeWeb(content: string, query: string, model: BackendModel) { | |
// if HF_ACCESS_TOKEN is set, we use a HF dedicated endpoint for summarization | |
try { | |
if (HF_ACCESS_TOKEN) { | |
const summary = ( | |
await new HfInference(HF_ACCESS_TOKEN).summarization({ | |
model: "facebook/bart-large-cnn", | |
inputs: content, | |
parameters: { | |
max_length: 512, | |
}, | |
}) | |
).summary_text; | |
return summary; | |
} | |
} catch (e) { | |
console.log(e); | |
} | |
// else we use the LLM to generate a summary | |
const summaryPrompt = defaultModel.webSearchSummaryPromptRender({ | |
answer: content | |
.split(" ") | |
.slice(0, model.parameters?.truncate ?? 0) | |
.join(" "), | |
query: query, | |
}); | |
const summary = await generateFromDefaultEndpoint(summaryPrompt).then((txt: string) => | |
txt.trim() | |
); | |
return summary; | |
} | |