Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
machineuser
commited on
Commit
·
4e99448
1
Parent(s):
c25d7cc
Sync widgets demo
Browse files- packages/inference/package.json +4 -2
- packages/inference/pnpm-lock.yaml +3 -1
- packages/inference/scripts/generate-dts.ts +19 -0
- packages/inference/src/tasks/nlp/textGeneration.ts +2 -202
- packages/inference/src/tasks/nlp/textGenerationStream.ts +1 -1
- packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte +3 -2
- packages/widgets/src/routes/+page.svelte +52 -7
packages/inference/package.json
CHANGED
@@ -40,7 +40,7 @@
|
|
40 |
"type": "module",
|
41 |
"scripts": {
|
42 |
"build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts",
|
43 |
-
"dts": "tsx scripts/generate-dts.ts",
|
44 |
"lint": "eslint --quiet --fix --ext .cjs,.ts .",
|
45 |
"lint:check": "eslint --ext .cjs,.ts .",
|
46 |
"format": "prettier --write .",
|
@@ -51,8 +51,10 @@
|
|
51 |
"test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.mts",
|
52 |
"check": "tsc"
|
53 |
},
|
|
|
|
|
|
|
54 |
"devDependencies": {
|
55 |
-
"@huggingface/tasks": "workspace:^",
|
56 |
"@types/node": "18.13.0"
|
57 |
},
|
58 |
"resolutions": {}
|
|
|
40 |
"type": "module",
|
41 |
"scripts": {
|
42 |
"build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts",
|
43 |
+
"dts": "tsx scripts/generate-dts.ts && tsc --noEmit dist/index.d.ts",
|
44 |
"lint": "eslint --quiet --fix --ext .cjs,.ts .",
|
45 |
"lint:check": "eslint --ext .cjs,.ts .",
|
46 |
"format": "prettier --write .",
|
|
|
51 |
"test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.mts",
|
52 |
"check": "tsc"
|
53 |
},
|
54 |
+
"dependencies": {
|
55 |
+
"@huggingface/tasks": "workspace:^"
|
56 |
+
},
|
57 |
"devDependencies": {
|
|
|
58 |
"@types/node": "18.13.0"
|
59 |
},
|
60 |
"resolutions": {}
|
packages/inference/pnpm-lock.yaml
CHANGED
@@ -4,10 +4,12 @@ settings:
|
|
4 |
autoInstallPeers: true
|
5 |
excludeLinksFromLockfile: false
|
6 |
|
7 |
-
|
8 |
'@huggingface/tasks':
|
9 |
specifier: workspace:^
|
10 |
version: link:../tasks
|
|
|
|
|
11 |
'@types/node':
|
12 |
specifier: 18.13.0
|
13 |
version: 18.13.0
|
|
|
4 |
autoInstallPeers: true
|
5 |
excludeLinksFromLockfile: false
|
6 |
|
7 |
+
dependencies:
|
8 |
'@huggingface/tasks':
|
9 |
specifier: workspace:^
|
10 |
version: link:../tasks
|
11 |
+
|
12 |
+
devDependencies:
|
13 |
'@types/node':
|
14 |
specifier: 18.13.0
|
15 |
version: 18.13.0
|
packages/inference/scripts/generate-dts.ts
CHANGED
@@ -3,6 +3,8 @@
|
|
3 |
import { readFileSync, writeFileSync, appendFileSync, readdirSync } from "node:fs";
|
4 |
import { TASKS_DATA } from "@huggingface/tasks";
|
5 |
|
|
|
|
|
6 |
const tasks = Object.keys(TASKS_DATA)
|
7 |
.sort()
|
8 |
.filter((task) => task !== "other");
|
@@ -36,6 +38,16 @@ for (const dir of dirs) {
|
|
36 |
|
37 |
const fileContent = readFileSync(`./src/tasks/${dir}/${file}`, "utf-8");
|
38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
39 |
for (const type of extractTypesAndInterfaces(fileContent)) {
|
40 |
appendFileSync("./dist/index.d.ts", type + "\n");
|
41 |
}
|
@@ -87,6 +99,13 @@ appendFileSync(
|
|
87 |
"\n}\n"
|
88 |
);
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
function* extractTypesAndInterfaces(fileContent: string): Iterable<string> {
|
91 |
let index = 0;
|
92 |
|
|
|
3 |
import { readFileSync, writeFileSync, appendFileSync, readdirSync } from "node:fs";
|
4 |
import { TASKS_DATA } from "@huggingface/tasks";
|
5 |
|
6 |
+
const taskImports = new Set<string>();
|
7 |
+
|
8 |
const tasks = Object.keys(TASKS_DATA)
|
9 |
.sort()
|
10 |
.filter((task) => task !== "other");
|
|
|
38 |
|
39 |
const fileContent = readFileSync(`./src/tasks/${dir}/${file}`, "utf-8");
|
40 |
|
41 |
+
// detect imports from @huggingface/tasks
|
42 |
+
for (const imports of fileContent.matchAll(/import type {(.*)} from "@huggingface\/tasks";/g)) {
|
43 |
+
// Convert A, B, C to ["A", "B", "C"]
|
44 |
+
const imported = imports[1].split(",").map((x) => x.trim());
|
45 |
+
|
46 |
+
for (const imp of imported) {
|
47 |
+
taskImports.add(imp);
|
48 |
+
}
|
49 |
+
}
|
50 |
+
|
51 |
for (const type of extractTypesAndInterfaces(fileContent)) {
|
52 |
appendFileSync("./dist/index.d.ts", type + "\n");
|
53 |
}
|
|
|
99 |
"\n}\n"
|
100 |
);
|
101 |
|
102 |
+
// Prepend import from @huggingface/tasks
|
103 |
+
writeFileSync(
|
104 |
+
"./dist/index.d.ts",
|
105 |
+
`import type { ${[...taskImports].join(", ")} } from "@huggingface/tasks";\n` +
|
106 |
+
readFileSync("./dist/index.d.ts", "utf-8")
|
107 |
+
);
|
108 |
+
|
109 |
function* extractTypesAndInterfaces(fileContent: string): Iterable<string> {
|
110 |
let index = 0;
|
111 |
|
packages/inference/src/tasks/nlp/textGeneration.ts
CHANGED
@@ -1,209 +1,9 @@
|
|
|
|
1 |
import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
2 |
import type { BaseArgs, Options } from "../../types";
|
3 |
import { request } from "../custom/request";
|
4 |
|
5 |
-
|
6 |
-
* Inputs for Text Generation inference
|
7 |
-
*/
|
8 |
-
export interface TextGenerationInput {
|
9 |
-
/**
|
10 |
-
* The text to initialize generation with
|
11 |
-
*/
|
12 |
-
inputs: string;
|
13 |
-
/**
|
14 |
-
* Additional inference parameters
|
15 |
-
*/
|
16 |
-
parameters?: TextGenerationParameters;
|
17 |
-
/**
|
18 |
-
* Whether to stream output tokens
|
19 |
-
*/
|
20 |
-
stream?: boolean;
|
21 |
-
[property: string]: unknown;
|
22 |
-
}
|
23 |
-
|
24 |
-
/**
|
25 |
-
* Additional inference parameters
|
26 |
-
*
|
27 |
-
* Additional inference parameters for Text Generation
|
28 |
-
*/
|
29 |
-
export interface TextGenerationParameters {
|
30 |
-
/**
|
31 |
-
* The number of sampling queries to run. Only the best one (in terms of total logprob) will
|
32 |
-
* be returned.
|
33 |
-
*/
|
34 |
-
best_of?: number;
|
35 |
-
/**
|
36 |
-
* Whether or not to output decoder input details
|
37 |
-
*/
|
38 |
-
decoder_input_details?: boolean;
|
39 |
-
/**
|
40 |
-
* Whether or not to output details
|
41 |
-
*/
|
42 |
-
details?: boolean;
|
43 |
-
/**
|
44 |
-
* Whether to use logits sampling instead of greedy decoding when generating new tokens.
|
45 |
-
*/
|
46 |
-
do_sample?: boolean;
|
47 |
-
/**
|
48 |
-
* The maximum number of tokens to generate.
|
49 |
-
*/
|
50 |
-
max_new_tokens?: number;
|
51 |
-
/**
|
52 |
-
* The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
|
53 |
-
* paper](https://hf.co/papers/1909.05858) for more details.
|
54 |
-
*/
|
55 |
-
repetition_penalty?: number;
|
56 |
-
/**
|
57 |
-
* Whether to prepend the prompt to the generated text.
|
58 |
-
*/
|
59 |
-
return_full_text?: boolean;
|
60 |
-
/**
|
61 |
-
* The random sampling seed.
|
62 |
-
*/
|
63 |
-
seed?: number;
|
64 |
-
/**
|
65 |
-
* Stop generating tokens if a member of `stop_sequences` is generated.
|
66 |
-
*/
|
67 |
-
stop_sequences?: string[];
|
68 |
-
/**
|
69 |
-
* The value used to modulate the logits distribution.
|
70 |
-
*/
|
71 |
-
temperature?: number;
|
72 |
-
/**
|
73 |
-
* The number of highest probability vocabulary tokens to keep for top-k-filtering.
|
74 |
-
*/
|
75 |
-
top_k?: number;
|
76 |
-
/**
|
77 |
-
* If set to < 1, only the smallest set of most probable tokens with probabilities that add
|
78 |
-
* up to `top_p` or higher are kept for generation.
|
79 |
-
*/
|
80 |
-
top_p?: number;
|
81 |
-
/**
|
82 |
-
* Truncate input tokens to the given size.
|
83 |
-
*/
|
84 |
-
truncate?: number;
|
85 |
-
/**
|
86 |
-
* Typical Decoding mass. See [Typical Decoding for Natural Language
|
87 |
-
* Generation](https://hf.co/papers/2202.00666) for more information
|
88 |
-
*/
|
89 |
-
typical_p?: number;
|
90 |
-
/**
|
91 |
-
* Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
|
92 |
-
*/
|
93 |
-
watermark?: boolean;
|
94 |
-
[property: string]: unknown;
|
95 |
-
}
|
96 |
-
|
97 |
-
/**
|
98 |
-
* Outputs for Text Generation inference
|
99 |
-
*/
|
100 |
-
export interface TextGenerationOutput {
|
101 |
-
/**
|
102 |
-
* When enabled, details about the generation
|
103 |
-
*/
|
104 |
-
details?: TextGenerationOutputDetails;
|
105 |
-
/**
|
106 |
-
* The generated text
|
107 |
-
*/
|
108 |
-
generated_text: string;
|
109 |
-
[property: string]: unknown;
|
110 |
-
}
|
111 |
-
|
112 |
-
/**
|
113 |
-
* When enabled, details about the generation
|
114 |
-
*/
|
115 |
-
export interface TextGenerationOutputDetails {
|
116 |
-
/**
|
117 |
-
* Details about additional sequences when best_of is provided
|
118 |
-
*/
|
119 |
-
best_of_sequences?: TextGenerationOutputSequenceDetails[];
|
120 |
-
/**
|
121 |
-
* The reason why the generation was stopped.
|
122 |
-
*/
|
123 |
-
finish_reason: TextGenerationFinishReason;
|
124 |
-
/**
|
125 |
-
* The number of generated tokens
|
126 |
-
*/
|
127 |
-
generated_tokens: number;
|
128 |
-
prefill: TextGenerationPrefillToken[];
|
129 |
-
/**
|
130 |
-
* The random seed used for generation
|
131 |
-
*/
|
132 |
-
seed?: number;
|
133 |
-
/**
|
134 |
-
* The generated tokens and associated details
|
135 |
-
*/
|
136 |
-
tokens: TextGenerationOutputToken[];
|
137 |
-
/**
|
138 |
-
* Most likely tokens
|
139 |
-
*/
|
140 |
-
top_tokens?: Array<TextGenerationOutputToken[]>;
|
141 |
-
[property: string]: unknown;
|
142 |
-
}
|
143 |
-
|
144 |
-
export interface TextGenerationOutputSequenceDetails {
|
145 |
-
finish_reason: TextGenerationFinishReason;
|
146 |
-
/**
|
147 |
-
* The generated text
|
148 |
-
*/
|
149 |
-
generated_text: string;
|
150 |
-
/**
|
151 |
-
* The number of generated tokens
|
152 |
-
*/
|
153 |
-
generated_tokens: number;
|
154 |
-
prefill: TextGenerationPrefillToken[];
|
155 |
-
/**
|
156 |
-
* The random seed used for generation
|
157 |
-
*/
|
158 |
-
seed?: number;
|
159 |
-
/**
|
160 |
-
* The generated tokens and associated details
|
161 |
-
*/
|
162 |
-
tokens: TextGenerationOutputToken[];
|
163 |
-
/**
|
164 |
-
* Most likely tokens
|
165 |
-
*/
|
166 |
-
top_tokens?: Array<TextGenerationOutputToken[]>;
|
167 |
-
[property: string]: unknown;
|
168 |
-
}
|
169 |
-
|
170 |
-
export interface TextGenerationPrefillToken {
|
171 |
-
id: number;
|
172 |
-
logprob: number;
|
173 |
-
/**
|
174 |
-
* The text associated with that token
|
175 |
-
*/
|
176 |
-
text: string;
|
177 |
-
[property: string]: unknown;
|
178 |
-
}
|
179 |
-
|
180 |
-
/**
|
181 |
-
* Generated token.
|
182 |
-
*/
|
183 |
-
export interface TextGenerationOutputToken {
|
184 |
-
id: number;
|
185 |
-
logprob?: number;
|
186 |
-
/**
|
187 |
-
* Whether or not that token is a special one
|
188 |
-
*/
|
189 |
-
special: boolean;
|
190 |
-
/**
|
191 |
-
* The text associated with that token
|
192 |
-
*/
|
193 |
-
text: string;
|
194 |
-
[property: string]: unknown;
|
195 |
-
}
|
196 |
-
|
197 |
-
/**
|
198 |
-
* The reason why the generation was stopped.
|
199 |
-
*
|
200 |
-
* length: The generated sequence reached the maximum allowed length
|
201 |
-
*
|
202 |
-
* eos_token: The model generated an end-of-sentence (EOS) token
|
203 |
-
*
|
204 |
-
* stop_sequence: One of the sequence in stop_sequences was generated
|
205 |
-
*/
|
206 |
-
export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
|
207 |
|
208 |
/**
|
209 |
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
|
|
|
1 |
+
import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks";
|
2 |
import { InferenceOutputError } from "../../lib/InferenceOutputError";
|
3 |
import type { BaseArgs, Options } from "../../types";
|
4 |
import { request } from "../custom/request";
|
5 |
|
6 |
+
export type { TextGenerationInput, TextGenerationOutput };
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
|
8 |
/**
|
9 |
* Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
|
packages/inference/src/tasks/nlp/textGenerationStream.ts
CHANGED
@@ -1,6 +1,6 @@
|
|
|
|
1 |
import type { BaseArgs, Options } from "../../types";
|
2 |
import { streamingRequest } from "../custom/streamingRequest";
|
3 |
-
import type { TextGenerationInput } from "./textGeneration";
|
4 |
|
5 |
export interface TextGenerationStreamToken {
|
6 |
/** Token ID from the model tokenizer */
|
|
|
1 |
+
import type { TextGenerationInput } from "@huggingface/tasks";
|
2 |
import type { BaseArgs, Options } from "../../types";
|
3 |
import { streamingRequest } from "../custom/streamingRequest";
|
|
|
4 |
|
5 |
export interface TextGenerationStreamToken {
|
6 |
/** Token ID from the model tokenizer */
|
packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte
CHANGED
@@ -51,6 +51,8 @@
|
|
51 |
let inferenceClient: HfInference | undefined = undefined;
|
52 |
let abort: AbortController | undefined = undefined;
|
53 |
|
|
|
|
|
54 |
// Check config and compile template
|
55 |
onMount(() => {
|
56 |
const config = model.config;
|
@@ -84,8 +86,6 @@
|
|
84 |
error = `Invalid chat template: "${(e as Error).message}"`;
|
85 |
return;
|
86 |
}
|
87 |
-
|
88 |
-
inferenceClient = new HfInference(apiToken);
|
89 |
});
|
90 |
|
91 |
async function handleNewMessage(): Promise<void> {
|
@@ -165,6 +165,7 @@
|
|
165 |
signal: abort?.signal,
|
166 |
use_cache: useCache || !$isLoggedIn,
|
167 |
wait_for_model: withModelLoading,
|
|
|
168 |
} satisfies Options;
|
169 |
|
170 |
tgiSupportedModels = await getTgiSupportedModels(apiUrl);
|
|
|
51 |
let inferenceClient: HfInference | undefined = undefined;
|
52 |
let abort: AbortController | undefined = undefined;
|
53 |
|
54 |
+
$: inferenceClient = new HfInference(apiToken);
|
55 |
+
|
56 |
// Check config and compile template
|
57 |
onMount(() => {
|
58 |
const config = model.config;
|
|
|
86 |
error = `Invalid chat template: "${(e as Error).message}"`;
|
87 |
return;
|
88 |
}
|
|
|
|
|
89 |
});
|
90 |
|
91 |
async function handleNewMessage(): Promise<void> {
|
|
|
165 |
signal: abort?.signal,
|
166 |
use_cache: useCache || !$isLoggedIn,
|
167 |
wait_for_model: withModelLoading,
|
168 |
+
retry_on_error: false,
|
169 |
} satisfies Options;
|
170 |
|
171 |
tgiSupportedModels = await getTgiSupportedModels(apiUrl);
|
packages/widgets/src/routes/+page.svelte
CHANGED
@@ -28,25 +28,70 @@
|
|
28 |
apiToken = token;
|
29 |
}
|
30 |
}
|
|
|
|
|
31 |
});
|
32 |
|
33 |
const models: ModelData[] = [
|
34 |
{
|
35 |
-
id: "
|
36 |
pipeline_tag: "text-generation",
|
37 |
tags: ["conversational"],
|
38 |
inference: InferenceDisplayability.Yes,
|
39 |
config: {
|
40 |
-
architectures: ["
|
41 |
-
model_type: "
|
42 |
tokenizer_config: {
|
43 |
chat_template:
|
44 |
-
"{
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
bos_token: "<s>",
|
47 |
-
|
|
|
|
|
|
|
48 |
unk_token: "<unk>",
|
49 |
-
pad_token: null,
|
50 |
},
|
51 |
},
|
52 |
widgetData: [
|
|
|
28 |
apiToken = token;
|
29 |
}
|
30 |
}
|
31 |
+
|
32 |
+
isLoggedIn.set(true);
|
33 |
});
|
34 |
|
35 |
const models: ModelData[] = [
|
36 |
{
|
37 |
+
id: "meta-llama/Meta-Llama-3-8B-Instruct",
|
38 |
pipeline_tag: "text-generation",
|
39 |
tags: ["conversational"],
|
40 |
inference: InferenceDisplayability.Yes,
|
41 |
config: {
|
42 |
+
architectures: ["LlamaForCausalLM"],
|
43 |
+
model_type: "llama",
|
44 |
tokenizer_config: {
|
45 |
chat_template:
|
46 |
+
"{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
|
47 |
+
bos_token: "<|begin_of_text|>",
|
48 |
+
eos_token: "<|end_of_text|>",
|
49 |
+
},
|
50 |
+
},
|
51 |
+
widgetData: [
|
52 |
+
{ text: "This is a text-only example", example_title: "Text only" },
|
53 |
+
{
|
54 |
+
messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
|
55 |
+
example_title: "Chat messages",
|
56 |
+
},
|
57 |
+
{
|
58 |
+
messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
|
59 |
+
output: {
|
60 |
+
text: "QCD is the physics of strong force and small particles.",
|
61 |
+
},
|
62 |
+
example_title: "Chat messages with Output",
|
63 |
+
},
|
64 |
+
{
|
65 |
+
text: "Explain QCD in one short sentence.",
|
66 |
+
output: {
|
67 |
+
text: "QCD is the physics of strong force and small particles.",
|
68 |
+
},
|
69 |
+
example_title: "Text only with Output",
|
70 |
+
},
|
71 |
+
{
|
72 |
+
example_title: "Invalid example - unsupported role",
|
73 |
+
messages: [
|
74 |
+
{ role: "system", content: "This will fail because of the chat template" },
|
75 |
+
{ role: "user", content: "What's your favorite condiment?" },
|
76 |
+
],
|
77 |
+
},
|
78 |
+
],
|
79 |
+
},
|
80 |
+
{
|
81 |
+
id: "microsoft/Phi-3-mini-128k-instruct",
|
82 |
+
pipeline_tag: "text-generation",
|
83 |
+
tags: ["conversational"],
|
84 |
+
inference: InferenceDisplayability.Yes,
|
85 |
+
config: {
|
86 |
+
architectures: ["Phi3ForCausalLM"],
|
87 |
+
model_type: "phi3",
|
88 |
+
tokenizer_config: {
|
89 |
bos_token: "<s>",
|
90 |
+
chat_template:
|
91 |
+
"{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
|
92 |
+
eos_token: "<|endoftext|>",
|
93 |
+
pad_token: "<|endoftext|>",
|
94 |
unk_token: "<unk>",
|
|
|
95 |
},
|
96 |
},
|
97 |
widgetData: [
|