machineuser commited on
Commit
4e99448
·
1 Parent(s): c25d7cc

Sync widgets demo

Browse files
packages/inference/package.json CHANGED
@@ -40,7 +40,7 @@
40
  "type": "module",
41
  "scripts": {
42
  "build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts",
43
- "dts": "tsx scripts/generate-dts.ts",
44
  "lint": "eslint --quiet --fix --ext .cjs,.ts .",
45
  "lint:check": "eslint --ext .cjs,.ts .",
46
  "format": "prettier --write .",
@@ -51,8 +51,10 @@
51
  "test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.mts",
52
  "check": "tsc"
53
  },
 
 
 
54
  "devDependencies": {
55
- "@huggingface/tasks": "workspace:^",
56
  "@types/node": "18.13.0"
57
  },
58
  "resolutions": {}
 
40
  "type": "module",
41
  "scripts": {
42
  "build": "tsup src/index.ts --format cjs,esm --clean && pnpm run dts",
43
+ "dts": "tsx scripts/generate-dts.ts && tsc --noEmit dist/index.d.ts",
44
  "lint": "eslint --quiet --fix --ext .cjs,.ts .",
45
  "lint:check": "eslint --ext .cjs,.ts .",
46
  "format": "prettier --write .",
 
51
  "test:browser": "vitest run --browser.name=chrome --browser.headless --config vitest.config.mts",
52
  "check": "tsc"
53
  },
54
+ "dependencies": {
55
+ "@huggingface/tasks": "workspace:^"
56
+ },
57
  "devDependencies": {
 
58
  "@types/node": "18.13.0"
59
  },
60
  "resolutions": {}
packages/inference/pnpm-lock.yaml CHANGED
@@ -4,10 +4,12 @@ settings:
4
  autoInstallPeers: true
5
  excludeLinksFromLockfile: false
6
 
7
- devDependencies:
8
  '@huggingface/tasks':
9
  specifier: workspace:^
10
  version: link:../tasks
 
 
11
  '@types/node':
12
  specifier: 18.13.0
13
  version: 18.13.0
 
4
  autoInstallPeers: true
5
  excludeLinksFromLockfile: false
6
 
7
+ dependencies:
8
  '@huggingface/tasks':
9
  specifier: workspace:^
10
  version: link:../tasks
11
+
12
+ devDependencies:
13
  '@types/node':
14
  specifier: 18.13.0
15
  version: 18.13.0
packages/inference/scripts/generate-dts.ts CHANGED
@@ -3,6 +3,8 @@
3
  import { readFileSync, writeFileSync, appendFileSync, readdirSync } from "node:fs";
4
  import { TASKS_DATA } from "@huggingface/tasks";
5
 
 
 
6
  const tasks = Object.keys(TASKS_DATA)
7
  .sort()
8
  .filter((task) => task !== "other");
@@ -36,6 +38,16 @@ for (const dir of dirs) {
36
 
37
  const fileContent = readFileSync(`./src/tasks/${dir}/${file}`, "utf-8");
38
 
 
 
 
 
 
 
 
 
 
 
39
  for (const type of extractTypesAndInterfaces(fileContent)) {
40
  appendFileSync("./dist/index.d.ts", type + "\n");
41
  }
@@ -87,6 +99,13 @@ appendFileSync(
87
  "\n}\n"
88
  );
89
 
 
 
 
 
 
 
 
90
  function* extractTypesAndInterfaces(fileContent: string): Iterable<string> {
91
  let index = 0;
92
 
 
3
  import { readFileSync, writeFileSync, appendFileSync, readdirSync } from "node:fs";
4
  import { TASKS_DATA } from "@huggingface/tasks";
5
 
6
+ const taskImports = new Set<string>();
7
+
8
  const tasks = Object.keys(TASKS_DATA)
9
  .sort()
10
  .filter((task) => task !== "other");
 
38
 
39
  const fileContent = readFileSync(`./src/tasks/${dir}/${file}`, "utf-8");
40
 
41
+ // detect imports from @huggingface/tasks
42
+ for (const imports of fileContent.matchAll(/import type {(.*)} from "@huggingface\/tasks";/g)) {
43
+ // Convert A, B, C to ["A", "B", "C"]
44
+ const imported = imports[1].split(",").map((x) => x.trim());
45
+
46
+ for (const imp of imported) {
47
+ taskImports.add(imp);
48
+ }
49
+ }
50
+
51
  for (const type of extractTypesAndInterfaces(fileContent)) {
52
  appendFileSync("./dist/index.d.ts", type + "\n");
53
  }
 
99
  "\n}\n"
100
  );
101
 
102
+ // Prepend import from @huggingface/tasks
103
+ writeFileSync(
104
+ "./dist/index.d.ts",
105
+ `import type { ${[...taskImports].join(", ")} } from "@huggingface/tasks";\n` +
106
+ readFileSync("./dist/index.d.ts", "utf-8")
107
+ );
108
+
109
  function* extractTypesAndInterfaces(fileContent: string): Iterable<string> {
110
  let index = 0;
111
 
packages/inference/src/tasks/nlp/textGeneration.ts CHANGED
@@ -1,209 +1,9 @@
 
1
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
2
  import type { BaseArgs, Options } from "../../types";
3
  import { request } from "../custom/request";
4
 
5
- /**
6
- * Inputs for Text Generation inference
7
- */
8
- export interface TextGenerationInput {
9
- /**
10
- * The text to initialize generation with
11
- */
12
- inputs: string;
13
- /**
14
- * Additional inference parameters
15
- */
16
- parameters?: TextGenerationParameters;
17
- /**
18
- * Whether to stream output tokens
19
- */
20
- stream?: boolean;
21
- [property: string]: unknown;
22
- }
23
-
24
- /**
25
- * Additional inference parameters
26
- *
27
- * Additional inference parameters for Text Generation
28
- */
29
- export interface TextGenerationParameters {
30
- /**
31
- * The number of sampling queries to run. Only the best one (in terms of total logprob) will
32
- * be returned.
33
- */
34
- best_of?: number;
35
- /**
36
- * Whether or not to output decoder input details
37
- */
38
- decoder_input_details?: boolean;
39
- /**
40
- * Whether or not to output details
41
- */
42
- details?: boolean;
43
- /**
44
- * Whether to use logits sampling instead of greedy decoding when generating new tokens.
45
- */
46
- do_sample?: boolean;
47
- /**
48
- * The maximum number of tokens to generate.
49
- */
50
- max_new_tokens?: number;
51
- /**
52
- * The parameter for repetition penalty. A value of 1.0 means no penalty. See [this
53
- * paper](https://hf.co/papers/1909.05858) for more details.
54
- */
55
- repetition_penalty?: number;
56
- /**
57
- * Whether to prepend the prompt to the generated text.
58
- */
59
- return_full_text?: boolean;
60
- /**
61
- * The random sampling seed.
62
- */
63
- seed?: number;
64
- /**
65
- * Stop generating tokens if a member of `stop_sequences` is generated.
66
- */
67
- stop_sequences?: string[];
68
- /**
69
- * The value used to modulate the logits distribution.
70
- */
71
- temperature?: number;
72
- /**
73
- * The number of highest probability vocabulary tokens to keep for top-k-filtering.
74
- */
75
- top_k?: number;
76
- /**
77
- * If set to < 1, only the smallest set of most probable tokens with probabilities that add
78
- * up to `top_p` or higher are kept for generation.
79
- */
80
- top_p?: number;
81
- /**
82
- * Truncate input tokens to the given size.
83
- */
84
- truncate?: number;
85
- /**
86
- * Typical Decoding mass. See [Typical Decoding for Natural Language
87
- * Generation](https://hf.co/papers/2202.00666) for more information
88
- */
89
- typical_p?: number;
90
- /**
91
- * Watermarking with [A Watermark for Large Language Models](https://hf.co/papers/2301.10226)
92
- */
93
- watermark?: boolean;
94
- [property: string]: unknown;
95
- }
96
-
97
- /**
98
- * Outputs for Text Generation inference
99
- */
100
- export interface TextGenerationOutput {
101
- /**
102
- * When enabled, details about the generation
103
- */
104
- details?: TextGenerationOutputDetails;
105
- /**
106
- * The generated text
107
- */
108
- generated_text: string;
109
- [property: string]: unknown;
110
- }
111
-
112
- /**
113
- * When enabled, details about the generation
114
- */
115
- export interface TextGenerationOutputDetails {
116
- /**
117
- * Details about additional sequences when best_of is provided
118
- */
119
- best_of_sequences?: TextGenerationOutputSequenceDetails[];
120
- /**
121
- * The reason why the generation was stopped.
122
- */
123
- finish_reason: TextGenerationFinishReason;
124
- /**
125
- * The number of generated tokens
126
- */
127
- generated_tokens: number;
128
- prefill: TextGenerationPrefillToken[];
129
- /**
130
- * The random seed used for generation
131
- */
132
- seed?: number;
133
- /**
134
- * The generated tokens and associated details
135
- */
136
- tokens: TextGenerationOutputToken[];
137
- /**
138
- * Most likely tokens
139
- */
140
- top_tokens?: Array<TextGenerationOutputToken[]>;
141
- [property: string]: unknown;
142
- }
143
-
144
- export interface TextGenerationOutputSequenceDetails {
145
- finish_reason: TextGenerationFinishReason;
146
- /**
147
- * The generated text
148
- */
149
- generated_text: string;
150
- /**
151
- * The number of generated tokens
152
- */
153
- generated_tokens: number;
154
- prefill: TextGenerationPrefillToken[];
155
- /**
156
- * The random seed used for generation
157
- */
158
- seed?: number;
159
- /**
160
- * The generated tokens and associated details
161
- */
162
- tokens: TextGenerationOutputToken[];
163
- /**
164
- * Most likely tokens
165
- */
166
- top_tokens?: Array<TextGenerationOutputToken[]>;
167
- [property: string]: unknown;
168
- }
169
-
170
- export interface TextGenerationPrefillToken {
171
- id: number;
172
- logprob: number;
173
- /**
174
- * The text associated with that token
175
- */
176
- text: string;
177
- [property: string]: unknown;
178
- }
179
-
180
- /**
181
- * Generated token.
182
- */
183
- export interface TextGenerationOutputToken {
184
- id: number;
185
- logprob?: number;
186
- /**
187
- * Whether or not that token is a special one
188
- */
189
- special: boolean;
190
- /**
191
- * The text associated with that token
192
- */
193
- text: string;
194
- [property: string]: unknown;
195
- }
196
-
197
- /**
198
- * The reason why the generation was stopped.
199
- *
200
- * length: The generated sequence reached the maximum allowed length
201
- *
202
- * eos_token: The model generated an end-of-sentence (EOS) token
203
- *
204
- * stop_sequence: One of the sequence in stop_sequences was generated
205
- */
206
- export type TextGenerationFinishReason = "length" | "eos_token" | "stop_sequence";
207
 
208
  /**
209
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
 
1
+ import type { TextGenerationInput, TextGenerationOutput } from "@huggingface/tasks";
2
  import { InferenceOutputError } from "../../lib/InferenceOutputError";
3
  import type { BaseArgs, Options } from "../../types";
4
  import { request } from "../custom/request";
5
 
6
+ export type { TextGenerationInput, TextGenerationOutput };
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  /**
9
  * Use to continue text from a prompt. This is a very generic task. Recommended model: gpt2 (it’s a simple model, but fun to play with).
packages/inference/src/tasks/nlp/textGenerationStream.ts CHANGED
@@ -1,6 +1,6 @@
 
1
  import type { BaseArgs, Options } from "../../types";
2
  import { streamingRequest } from "../custom/streamingRequest";
3
- import type { TextGenerationInput } from "./textGeneration";
4
 
5
  export interface TextGenerationStreamToken {
6
  /** Token ID from the model tokenizer */
 
1
+ import type { TextGenerationInput } from "@huggingface/tasks";
2
  import type { BaseArgs, Options } from "../../types";
3
  import { streamingRequest } from "../custom/streamingRequest";
 
4
 
5
  export interface TextGenerationStreamToken {
6
  /** Token ID from the model tokenizer */
packages/widgets/src/lib/components/InferenceWidget/widgets/ConversationalWidget/ConversationalWidget.svelte CHANGED
@@ -51,6 +51,8 @@
51
  let inferenceClient: HfInference | undefined = undefined;
52
  let abort: AbortController | undefined = undefined;
53
 
 
 
54
  // Check config and compile template
55
  onMount(() => {
56
  const config = model.config;
@@ -84,8 +86,6 @@
84
  error = `Invalid chat template: "${(e as Error).message}"`;
85
  return;
86
  }
87
-
88
- inferenceClient = new HfInference(apiToken);
89
  });
90
 
91
  async function handleNewMessage(): Promise<void> {
@@ -165,6 +165,7 @@
165
  signal: abort?.signal,
166
  use_cache: useCache || !$isLoggedIn,
167
  wait_for_model: withModelLoading,
 
168
  } satisfies Options;
169
 
170
  tgiSupportedModels = await getTgiSupportedModels(apiUrl);
 
51
  let inferenceClient: HfInference | undefined = undefined;
52
  let abort: AbortController | undefined = undefined;
53
 
54
+ $: inferenceClient = new HfInference(apiToken);
55
+
56
  // Check config and compile template
57
  onMount(() => {
58
  const config = model.config;
 
86
  error = `Invalid chat template: "${(e as Error).message}"`;
87
  return;
88
  }
 
 
89
  });
90
 
91
  async function handleNewMessage(): Promise<void> {
 
165
  signal: abort?.signal,
166
  use_cache: useCache || !$isLoggedIn,
167
  wait_for_model: withModelLoading,
168
+ retry_on_error: false,
169
  } satisfies Options;
170
 
171
  tgiSupportedModels = await getTgiSupportedModels(apiUrl);
packages/widgets/src/routes/+page.svelte CHANGED
@@ -28,25 +28,70 @@
28
  apiToken = token;
29
  }
30
  }
 
 
31
  });
32
 
33
  const models: ModelData[] = [
34
  {
35
- id: "mistralai/Mistral-7B-Instruct-v0.2",
36
  pipeline_tag: "text-generation",
37
  tags: ["conversational"],
38
  inference: InferenceDisplayability.Yes,
39
  config: {
40
- architectures: ["MistralForCausalLM"],
41
- model_type: "mistral",
42
  tokenizer_config: {
43
  chat_template:
44
- "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') != (loop.index0 % 2 == 0) %}{{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}{% endif %}{% if message['role'] == 'user' %}{{ '[INST] ' + message['content'] + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ message['content'] + eos_token}}{% else %}{{ raise_exception('Only user and assistant roles are supported!') }}{% endif %}{% endfor %}",
45
- use_default_system_prompt: false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  bos_token: "<s>",
47
- eos_token: "</s>",
 
 
 
48
  unk_token: "<unk>",
49
- pad_token: null,
50
  },
51
  },
52
  widgetData: [
 
28
  apiToken = token;
29
  }
30
  }
31
+
32
+ isLoggedIn.set(true);
33
  });
34
 
35
  const models: ModelData[] = [
36
  {
37
+ id: "meta-llama/Meta-Llama-3-8B-Instruct",
38
  pipeline_tag: "text-generation",
39
  tags: ["conversational"],
40
  inference: InferenceDisplayability.Yes,
41
  config: {
42
+ architectures: ["LlamaForCausalLM"],
43
+ model_type: "llama",
44
  tokenizer_config: {
45
  chat_template:
46
+ "{% set loop_messages = messages %}{% for message in loop_messages %}{% set content = '<|start_header_id|>' + message['role'] + '<|end_header_id|>\n\n'+ message['content'] | trim + '<|eot_id|>' %}{% if loop.index0 == 0 %}{% set content = bos_token + content %}{% endif %}{{ content }}{% endfor %}{% if add_generation_prompt %}{{ '<|start_header_id|>assistant<|end_header_id|>\n\n' }}{% endif %}",
47
+ bos_token: "<|begin_of_text|>",
48
+ eos_token: "<|end_of_text|>",
49
+ },
50
+ },
51
+ widgetData: [
52
+ { text: "This is a text-only example", example_title: "Text only" },
53
+ {
54
+ messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
55
+ example_title: "Chat messages",
56
+ },
57
+ {
58
+ messages: [{ content: "Please exlain QCD in very few words", role: "user" }],
59
+ output: {
60
+ text: "QCD is the physics of strong force and small particles.",
61
+ },
62
+ example_title: "Chat messages with Output",
63
+ },
64
+ {
65
+ text: "Explain QCD in one short sentence.",
66
+ output: {
67
+ text: "QCD is the physics of strong force and small particles.",
68
+ },
69
+ example_title: "Text only with Output",
70
+ },
71
+ {
72
+ example_title: "Invalid example - unsupported role",
73
+ messages: [
74
+ { role: "system", content: "This will fail because of the chat template" },
75
+ { role: "user", content: "What's your favorite condiment?" },
76
+ ],
77
+ },
78
+ ],
79
+ },
80
+ {
81
+ id: "microsoft/Phi-3-mini-128k-instruct",
82
+ pipeline_tag: "text-generation",
83
+ tags: ["conversational"],
84
+ inference: InferenceDisplayability.Yes,
85
+ config: {
86
+ architectures: ["Phi3ForCausalLM"],
87
+ model_type: "phi3",
88
+ tokenizer_config: {
89
  bos_token: "<s>",
90
+ chat_template:
91
+ "{{ bos_token }}{% for message in messages %}{% if (message['role'] == 'user') %}{{'<|user|>' + '\n' + message['content'] + '<|end|>' + '\n' + '<|assistant|>' + '\n'}}{% elif (message['role'] == 'assistant') %}{{message['content'] + '<|end|>' + '\n'}}{% endif %}{% endfor %}",
92
+ eos_token: "<|endoftext|>",
93
+ pad_token: "<|endoftext|>",
94
  unk_token: "<unk>",
 
95
  },
96
  },
97
  widgetData: [