doc-vis-qa

Running

App Files Files Community

Vova Manannikov commited on Apr 24, 2023

Commit

dc92bf4

•

1 Parent(s): 1204aad

Update to doc & vis question answering

Browse files

Files changed (3) hide show

README.md +7 -5
index.html +163 -108
index.mjs +616 -0

README.md CHANGED Viewed

@@ -1,15 +1,17 @@
 ---
-title: Image to text
-emoji: 📚
 colorFrom: pink
 colorTo: indigo
 sdk: static
 pinned: false
 license: mit
-description: Showcase image captioning or text extraction using huggingface.js
 duplicated_from: huggingfacejs/image-to-text
 ---
-Showcase streaming text generation using the `@huggingface/inference` JS lib.
-Default model for inference: https://huggingface.co/nlpconnect/vit-gpt2-image-captioning

 ---
+title: Document and visual question answering
+emoji: ❓
 colorFrom: pink
 colorTo: indigo
 sdk: static
 pinned: false
 license: mit
+description: Showcase document & visual question answering using huggingface.js
 duplicated_from: huggingfacejs/image-to-text
 ---
+Showcase document & visual question answering using the `@huggingface/inference` JS lib.
+Default models for inference:
+ * Documents: https://huggingface.co/impira/layoutlm-document-qa
+ * Images: https://huggingface.co/dandelin/vilt-b32-finetuned-vqa

index.html CHANGED Viewed

@@ -1,120 +1,175 @@
 <!DOCTYPE html>
 <html>
-	<head>
-		<meta charset="UTF-8" />
-		<meta name="viewport" content="width=device-width, initial-scale=1.0" />
-		<script src="https://cdn.tailwindcss.com"></script>
-		<!-- polyfill for firefox + import maps -->
-		<script src="https://unpkg.com/[email protected]/dist/es-module-shims.js"></script>
-		<script type="importmap">
 			{
 				"imports": {
-					"@huggingface/inference": "https://cdn.jsdelivr.net/npm/@huggingface/[email protected]/+esm"
 				}
 			}
-		</script>
-	</head>
-	<body>
-		<form class="w-[90%] mx-auto pt-8" onsubmit="launch(); return false;">
-			<h1 class="text-3xl font-bold">
 				<span
-					class="bg-clip-text text-transparent bg-gradient-to-r from-pink-500 to-violet-500"
-				>
-					Image to text demo with
 					<a href="https://github.com/huggingface/huggingface.js">
 						<kbd>@huggingface/inference</kbd>
 					</a>
 				</span>
-			</h1>
-			<p class="mt-8">
-				First, input your token if you have one! Otherwise, you may encounter
-				rate limiting. You can create a token for free at
-				<a
-					target="_blank"
-					href="https://huggingface.co/settings/tokens"
-					class="underline text-blue-500"
-					>hf.co/settings/tokens</a
-				>
-			</p>
-			<input
-				type="text"
-				id="token"
-				class="rounded border-2 border-blue-500 shadow-md px-3 py-2 w-96 mt-6"
-				placeholder="token (optional)"
-			/>
-			<p class="mt-8">
-				Pick the model you want to run. Check out over 100 models for image to text
-				<a
-					href="https://huggingface.co/tasks/image-to-text"
-					class="underline text-blue-500"
-					target="_blank"
-				>
-					here</a
-				>. The default model is for image captioning, but you can do text extraction, ...
-			</p>
-			<!-- Default model: https://huggingface.co/nlpconnect/vit-gpt2-image-captioning -->
-			<input
-				type="text"
-				id="model"
-				class="rounded border-2 border-blue-500 shadow-md px-3 py-2 w-96 mt-6"
-				value="nlpconnect/vit-gpt2-image-captioning"
-				required
-			/>
-			<p class="mt-8">Finally the input image</p>
-			<input type="file" required accept="image/*"
-				class="rounded border-blue-500 shadow-md px-3 py-2 w-96 mt-6 block"
-				rows="5"
-				id="prompt"
-			/>
-			<button
-				id="submit"
-				class="my-8 bg-green-500 rounded py-3 px-5 text-white shadow-md disabled:bg-slate-300"
-			>
-				Run
-			</button>
-			<p class="text-gray-400 text-sm">Output logs</p>
-			<div id="logs" class="bg-gray-100 rounded p-3 mb-8 text-sm">
-				Output will be here
-			</div>
-			<p>Check out the <a class="underline text-blue-500" href="https://huggingface.co/spaces/huggingfacejs/image-to-text/blob/main/index.html" target="_blank">source code</a></p>
-		</form>
-		<script type="module">
-			import { HfInference } from "@huggingface/inference";
-			let running = false;
-			async function launch() {
-				if (running) {
-					return;
-				}
-				running = true;
-				try {
-					const hf = new HfInference(
-						document.getElementById("token").value.trim() || undefined
-					);
-					const model = document.getElementById("model").value.trim();
-					const prompt = document.getElementById("prompt").files[0];
-					document.getElementById("logs").textContent = "";
-					const {generated_text} = await hf.imageToText({model, data: prompt});
-					document.getElementById("logs").textContent = generated_text;
-				} catch (err) {
-					alert("Error: " + err.message);
-				} finally {
-					running = false;
-				}
-			}
-			window.launch = launch;
-		</script>
-	</body>
 </html>

 <!DOCTYPE html>
 <html>
+<head>
+    <meta charset="UTF-8"/>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
+    <script src="https://cdn.tailwindcss.com"></script>
+    <!-- polyfill for firefox + import maps -->
+    <script src="https://unpkg.com/[email protected]/dist/es-module-shims.js"></script>
+    <script type="importmap">
 			{
 				"imports": {
+					"@huggingface/inference": "./index.mjs"
 				}
 			}
+    </script>
+</head>
+<body>
+<form class="w-[90%] mx-auto pt-8" onsubmit="launch(); return false;">
+    <h1 class="text-3xl font-bold">
 				<span
+                        class="bg-clip-text text-transparent bg-gradient-to-r from-pink-500 to-violet-500"
+                >
+					Document & visual question answering demo with
 					<a href="https://github.com/huggingface/huggingface.js">
 						<kbd>@huggingface/inference</kbd>
 					</a>
 				</span>
+    </h1>
+    <p class="mt-8">
+        First, input your token if you have one! Otherwise, you may encounter
+        rate limiting. You can create a token for free at
+        <a
+                target="_blank"
+                href="https://huggingface.co/settings/tokens"
+                class="underline text-blue-500"
+        >hf.co/settings/tokens</a
+        >
+    </p>
+    <input
+            type="text"
+            id="token"
+            class="rounded border-2 border-blue-500 shadow-md px-3 py-2 w-96 mt-6"
+            placeholder="token (optional)"
+    />
+    <p class="mt-8">
+        Pick the model type and the model you want to run. Check out models for
+        <a
+                href="https://huggingface.co/tasks/document-question-answering"
+                class="underline text-blue-500"
+                target="_blank"
+        >
+            document</a
+        > and
+        <a
+                href="https://huggingface.co/tasks/visual-question-answering"
+                class="underline text-blue-500"
+                target="_blank"
+        >image</a> question answering.
+    </p>
+    <div class="space-x-2 flex text-sm mt-8">
+        <label>
+            <input class="sr-only peer" name="type" type="radio" value="document" onclick="update_model(this.value)" checked />
+            <div class="px-3 py-3 rounded-lg shadow-md flex items-center justify-center text-slate-700 bg-gradient-to-r peer-checked:font-semibold peer-checked:from-pink-500 peer-checked:to-violet-500 peer-checked:text-white">
+                Document
+            </div>
+        </label>
+        <label>
+            <input class="sr-only peer" name="type" type="radio" value="image" onclick="update_model(this.value)" />
+            <div class="px-3 py-3 rounded-lg shadow-md flex items-center justify-center text-slate-700 bg-gradient-to-r peer-checked:font-semibold peer-checked:from-pink-500 peer-checked:to-violet-500 peer-checked:text-white">
+                Image
+            </div>
+        </label>
+    </div>
+    <input
+            id="model"
+            class="rounded border-2 border-blue-500 shadow-md px-3 py-2 w-96 mt-6"
+            value="impira/layoutlm-document-qa"
+            required
+    />
+    <p class="mt-8">The input image</p>
+    <input type="file" required accept="image/*"
+           class="rounded border-blue-500 shadow-md px-3 py-2 w-96 mt-6 block"
+           rows="5"
+           id="image"
+    />
+    <p class="mt-8">The question</p>
+    <input
+            type="text"
+            id="question"
+            class="rounded border-2 border-blue-500 shadow-md px-3 py-2 w-96 mt-6"
+            required
+    />
+    <button
+            id="submit"
+            class="my-8 bg-green-500 rounded py-3 px-5 text-white shadow-md disabled:bg-slate-300"
+    >
+        Run
+    </button>
+    <p class="text-gray-400 text-sm">Output logs</p>
+    <div id="logs" class="bg-gray-100 rounded p-3 mb-8 text-sm">
+        Output will be here
+    </div>
+    <p>Check out the <a class="underline text-blue-500"
+                        href="#"
+                        target="_blank">source code</a></p>
+</form>
+<script type="module">
+    import {HfInference} from "@huggingface/inference";
+    const default_models = {
+        "document": "impira/layoutlm-document-qa",
+        "image": "dandelin/vilt-b32-finetuned-vqa",
+    };
+    let running = false;
+    async function launch() {
+        if (running) {
+            return;
+        }
+        running = true;
+        try {
+            const hf = new HfInference(
+                document.getElementById("token").value.trim() || undefined
+            );
+            const model = document.getElementById("model").value.trim();
+            const model_type = document.querySelector("[name=type]:checked").value;
+            const image = document.getElementById("image").files[0];
+            const question = document.getElementById("question").value.trim();
+            document.getElementById("logs").textContent = "";
+            const method = model_type === "document" ? hf.documentQuestionAnswering : hf.visualQuestionAnswering;
+            const {answer, score} = await method({model, inputs: {
+                image, question
+                }});
+            document.getElementById("logs").textContent = answer + ": " + score;
+        } catch (err) {
+            alert("Error: " + err.message);
+        } finally {
+            running = false;
+        }
+    }
+    window.launch = launch;
+    window.update_model = (model_type) => {
+        const model_input = document.getElementById("model");
+        const cur_model = model_input.value.trim();
+        let new_model = "";
+        if (
+            model_type === "document" && cur_model === default_models["image"]
+            || model_type === "image" && cur_model === default_models["document"]
+            || cur_model === ""
+        ) {
+             new_model = default_models[model_type];
+        }
+        model_input.value = new_model;
+    };
+</script>
+</body>
 </html>

index.mjs ADDED Viewed

	@@ -0,0 +1,616 @@

+var __defProp = Object.defineProperty;
+var __export = (target, all) => {
+  for (var name in all)
+    __defProp(target, name, { get: all[name], enumerable: true });
+};
+// src/tasks/index.ts
+var tasks_exports = {};
+__export(tasks_exports, {
+  audioClassification: () => audioClassification,
+  automaticSpeechRecognition: () => automaticSpeechRecognition,
+  conversational: () => conversational,
+  documentQuestionAnswering: () => documentQuestionAnswering,
+  featureExtraction: () => featureExtraction,
+  fillMask: () => fillMask,
+  imageClassification: () => imageClassification,
+  imageSegmentation: () => imageSegmentation,
+  imageToText: () => imageToText,
+  objectDetection: () => objectDetection,
+  questionAnswering: () => questionAnswering,
+  request: () => request,
+  sentenceSimilarity: () => sentenceSimilarity,
+  streamingRequest: () => streamingRequest,
+  summarization: () => summarization,
+  tableQuestionAnswering: () => tableQuestionAnswering,
+  textClassification: () => textClassification,
+  textGeneration: () => textGeneration,
+  textGenerationStream: () => textGenerationStream,
+  textToImage: () => textToImage,
+  tokenClassification: () => tokenClassification,
+  translation: () => translation,
+  visualQuestionAnswering: () => visualQuestionAnswering,
+  zeroShotClassification: () => zeroShotClassification
+});
+// src/lib/makeRequestOptions.ts
+var HF_INFERENCE_API_BASE_URL = "https://api-inference.huggingface.co/models/";
+function makeRequestOptions(args, options) {
+  const { model, accessToken, ...otherArgs } = args;
+  const headers = {};
+  if (accessToken) {
+    headers["Authorization"] = `Bearer ${accessToken}`;
+  }
+  const binary = "data" in args && !!args.data;
+  if (!binary) {
+    headers["Content-Type"] = "application/json";
+  } else {
+    if (options?.wait_for_model) {
+      headers["X-Wait-For-Model"] = "true";
+    }
+    if (options?.use_cache === false) {
+      headers["X-Use-Cache"] = "false";
+    }
+    if (options?.dont_load_model) {
+      headers["X-Load-Model"] = "0";
+    }
+  }
+  const url = /^http(s?):/.test(model) || model.startsWith("/") ? model : `${HF_INFERENCE_API_BASE_URL}${model}`;
+  const info = {
+    headers,
+    method: "POST",
+    body: binary ? args.data : JSON.stringify({
+      ...otherArgs,
+      options
+    }),
+    credentials: options?.includeCredentials ? "include" : "same-origin"
+  };
+  return { url, info };
+}
+// src/tasks/custom/request.ts
+async function request(args, options) {
+  const { url, info } = makeRequestOptions(args, options);
+  const response = await fetch(url, info);
+  if (options?.retry_on_error !== false && response.status === 503 && !options?.wait_for_model) {
+    return request(args, {
+      ...options,
+      wait_for_model: true
+    });
+  }
+  if (!response.ok) {
+    if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+      const output = await response.json();
+      if (output.error) {
+        throw new Error(output.error);
+      }
+    }
+    throw new Error("An error occurred while fetching the blob");
+  }
+  if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+    return await response.json();
+  }
+  return await response.blob();
+}
+// src/vendor/fetch-event-source/parse.ts
+function getLines(onLine) {
+  let buffer;
+  let position;
+  let fieldLength;
+  let discardTrailingNewline = false;
+  return function onChunk(arr) {
+    if (buffer === void 0) {
+      buffer = arr;
+      position = 0;
+      fieldLength = -1;
+    } else {
+      buffer = concat(buffer, arr);
+    }
+    const bufLength = buffer.length;
+    let lineStart = 0;
+    while (position < bufLength) {
+      if (discardTrailingNewline) {
+        if (buffer[position] === 10 /* NewLine */) {
+          lineStart = ++position;
+        }
+        discardTrailingNewline = false;
+      }
+      let lineEnd = -1;
+      for (; position < bufLength && lineEnd === -1; ++position) {
+        switch (buffer[position]) {
+          case 58 /* Colon */:
+            if (fieldLength === -1) {
+              fieldLength = position - lineStart;
+            }
+            break;
+          case 13 /* CarriageReturn */:
+            discardTrailingNewline = true;
+          case 10 /* NewLine */:
+            lineEnd = position;
+            break;
+        }
+      }
+      if (lineEnd === -1) {
+        break;
+      }
+      onLine(buffer.subarray(lineStart, lineEnd), fieldLength);
+      lineStart = position;
+      fieldLength = -1;
+    }
+    if (lineStart === bufLength) {
+      buffer = void 0;
+    } else if (lineStart !== 0) {
+      buffer = buffer.subarray(lineStart);
+      position -= lineStart;
+    }
+  };
+}
+function getMessages(onId, onRetry, onMessage) {
+  let message = newMessage();
+  const decoder = new TextDecoder();
+  return function onLine(line, fieldLength) {
+    if (line.length === 0) {
+      onMessage?.(message);
+      message = newMessage();
+    } else if (fieldLength > 0) {
+      const field = decoder.decode(line.subarray(0, fieldLength));
+      const valueOffset = fieldLength + (line[fieldLength + 1] === 32 /* Space */ ? 2 : 1);
+      const value = decoder.decode(line.subarray(valueOffset));
+      switch (field) {
+        case "data":
+          message.data = message.data ? message.data + "\n" + value : value;
+          break;
+        case "event":
+          message.event = value;
+          break;
+        case "id":
+          onId(message.id = value);
+          break;
+        case "retry":
+          const retry = parseInt(value, 10);
+          if (!isNaN(retry)) {
+            onRetry(message.retry = retry);
+          }
+          break;
+      }
+    }
+  };
+}
+function concat(a, b) {
+  const res = new Uint8Array(a.length + b.length);
+  res.set(a);
+  res.set(b, a.length);
+  return res;
+}
+function newMessage() {
+  return {
+    data: "",
+    event: "",
+    id: "",
+    retry: void 0
+  };
+}
+// src/tasks/custom/streamingRequest.ts
+async function* streamingRequest(args, options) {
+  const { url, info } = makeRequestOptions({ ...args, stream: true }, options);
+  const response = await fetch(url, info);
+  if (options?.retry_on_error !== false && response.status === 503 && !options?.wait_for_model) {
+    return streamingRequest(args, {
+      ...options,
+      wait_for_model: true
+    });
+  }
+  if (!response.ok) {
+    if (response.headers.get("Content-Type")?.startsWith("application/json")) {
+      const output = await response.json();
+      if (output.error) {
+        throw new Error(output.error);
+      }
+    }
+    throw new Error(`Server response contains error: ${response.status}`);
+  }
+  if (response.headers.get("content-type") !== "text/event-stream") {
+    throw new Error(
+      `Server does not support event stream content type, it returned ` + response.headers.get("content-type")
+    );
+  }
+  if (!response.body) {
+    return;
+  }
+  const reader = response.body.getReader();
+  let events = [];
+  const onEvent = (event) => {
+    events.push(event);
+  };
+  const onChunk = getLines(
+    getMessages(
+      () => {
+      },
+      () => {
+      },
+      onEvent
+    )
+  );
+  try {
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done)
+        return;
+      onChunk(value);
+      for (const event of events) {
+        if (event.data.length > 0) {
+          yield JSON.parse(event.data);
+        }
+      }
+      events = [];
+    }
+  } finally {
+    reader.releaseLock();
+  }
+}
+// src/lib/InferenceOutputError.ts
+var InferenceOutputError = class extends TypeError {
+  constructor(message) {
+    super(
+      `Invalid inference output: ${message}. Use the 'request' method with the same parameters to do a custom call with no type checking.`
+    );
+    this.name = "InferenceOutputError";
+  }
+};
+// src/tasks/audio/audioClassification.ts
+async function audioClassification(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{label: string, score: number}>");
+  }
+  return res;
+}
+// src/tasks/audio/automaticSpeechRecognition.ts
+async function automaticSpeechRecognition(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = typeof res?.text === "string";
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected {text: string}");
+  }
+  return res;
+}
+// src/tasks/cv/imageClassification.ts
+async function imageClassification(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.score === "number");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{label: string, score: number}>");
+  }
+  return res;
+}
+// src/tasks/cv/imageSegmentation.ts
+async function imageSegmentation(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x.label === "string" && typeof x.mask === "string" && typeof x.score === "number");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{label: string, mask: string, score: number}>");
+  }
+  return res;
+}
+// src/tasks/cv/imageToText.ts
+async function imageToText(args, options) {
+  const res = (await request(args, options))?.[0];
+  if (typeof res?.generated_text !== "string") {
+    throw new InferenceOutputError("Expected {generated_text: string}");
+  }
+  return res;
+}
+// src/tasks/cv/objectDetection.ts
+async function objectDetection(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every(
+    (x) => typeof x.label === "string" && typeof x.score === "number" && typeof x.box.xmin === "number" && typeof x.box.ymin === "number" && typeof x.box.xmax === "number" && typeof x.box.ymax === "number"
+  );
+  if (!isValidOutput) {
+    throw new InferenceOutputError(
+      "Expected Array<{label:string; score:number; box:{xmin:number; ymin:number; xmax:number; ymax:number}}>"
+    );
+  }
+  return res;
+}
+// src/tasks/cv/textToImage.ts
+async function textToImage(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = res && res instanceof Blob;
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Blob");
+  }
+  return res;
+}
+// src/tasks/nlp/conversational.ts
+async function conversational(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res.conversation.generated_responses) && res.conversation.generated_responses.every((x) => typeof x === "string") && Array.isArray(res.conversation.past_user_inputs) && res.conversation.past_user_inputs.every((x) => typeof x === "string") && typeof res.generated_text === "string" && Array.isArray(res.warnings) && res.warnings.every((x) => typeof x === "string");
+  if (!isValidOutput) {
+    throw new InferenceOutputError(
+      "Expected {conversation: {generated_responses: string[], past_user_inputs: string[]}, generated_text: string, warnings: string[]}"
+    );
+  }
+  return res;
+}
+// src/tasks/nlp/featureExtraction.ts
+async function featureExtraction(args, options) {
+  const res = await request(args, options);
+  let isValidOutput = true;
+  if (Array.isArray(res)) {
+    for (const e of res) {
+      if (Array.isArray(e)) {
+        isValidOutput = e.every((x) => typeof x === "number");
+        if (!isValidOutput) {
+          break;
+        }
+      } else if (typeof e !== "number") {
+        isValidOutput = false;
+        break;
+      }
+    }
+  } else {
+    isValidOutput = false;
+  }
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<number[] | number>");
+  }
+  return res;
+}
+// src/tasks/nlp/fillMask.ts
+async function fillMask(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every(
+    (x) => typeof x.score === "number" && typeof x.sequence === "string" && typeof x.token === "number" && typeof x.token_str === "string"
+  );
+  if (!isValidOutput) {
+    throw new InferenceOutputError(
+      "Expected Array<{score: number, sequence: string, token: number, token_str: string}>"
+    );
+  }
+  return res;
+}
+// src/tasks/nlp/questionAnswering.ts
+async function questionAnswering(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = typeof res?.answer === "string" && typeof res.end === "number" && typeof res.score === "number" && typeof res.start === "number";
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected {answer: string, end: number, score: number, start: number}");
+  }
+  return res;
+}
+// src/tasks/nlp/sentenceSimilarity.ts
+async function sentenceSimilarity(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x === "number");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected number[]");
+  }
+  return res;
+}
+// src/tasks/nlp/summarization.ts
+async function summarization(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.summary_text === "string");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{summary_text: string}>");
+  }
+  return res?.[0];
+}
+// src/tasks/nlp/tableQuestionAnswering.ts
+async function tableQuestionAnswering(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = typeof res?.aggregator === "string" && typeof res.answer === "string" && Array.isArray(res.cells) && res.cells.every((x) => typeof x === "string") && Array.isArray(res.coordinates) && res.coordinates.every((coord) => Array.isArray(coord) && coord.every((x) => typeof x === "number"));
+  if (!isValidOutput) {
+    throw new InferenceOutputError(
+      "Expected {aggregator: string, answer: string, cells: string[], coordinates: number[][]}"
+    );
+  }
+  return res;
+}
+// src/tasks/nlp/textClassification.ts
+async function textClassification(args, options) {
+  const res = (await request(args, options))?.[0];
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.label === "string" && typeof x.score === "number");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{label: string, score: number}>");
+  }
+  return res;
+}
+// src/tasks/nlp/textGeneration.ts
+async function textGeneration(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.generated_text === "string");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{generated_text: string}>");
+  }
+  return res?.[0];
+}
+// src/tasks/nlp/textGenerationStream.ts
+async function* textGenerationStream(args, options) {
+  yield* streamingRequest(args, options);
+}
+// src/utils/toArray.ts
+function toArray(obj) {
+  if (Array.isArray(obj)) {
+    return obj;
+  }
+  return [obj];
+}
+// src/tasks/nlp/tokenClassification.ts
+async function tokenClassification(args, options) {
+  const res = toArray(await request(args, options));
+  const isValidOutput = Array.isArray(res) && res.every(
+    (x) => typeof x.end === "number" && typeof x.entity_group === "string" && typeof x.score === "number" && typeof x.start === "number" && typeof x.word === "string"
+  );
+  if (!isValidOutput) {
+    throw new InferenceOutputError(
+      "Expected Array<{end: number, entity_group: string, score: number, start: number, word: string}>"
+    );
+  }
+  return res;
+}
+// src/tasks/nlp/translation.ts
+async function translation(args, options) {
+  const res = await request(args, options);
+  const isValidOutput = Array.isArray(res) && res.every((x) => typeof x?.translation_text === "string");
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected type Array<{translation_text: string}>");
+  }
+  return res?.[0];
+}
+// src/tasks/nlp/zeroShotClassification.ts
+async function zeroShotClassification(args, options) {
+  const res = toArray(
+    await request(args, options)
+  );
+  const isValidOutput = Array.isArray(res) && res.every(
+    (x) => Array.isArray(x.labels) && x.labels.every((_label) => typeof _label === "string") && Array.isArray(x.scores) && x.scores.every((_score) => typeof _score === "number") && typeof x.sequence === "string"
+  );
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{labels: string[], scores: number[], sequence: string}>");
+  }
+  return res;
+}
+// ../shared/src/base64FromBytes.ts
+function base64FromBytes(arr) {
+  if (globalThis.Buffer) {
+    return globalThis.Buffer.from(arr).toString("base64");
+  } else {
+    const bin = [];
+    arr.forEach((byte) => {
+      bin.push(String.fromCharCode(byte));
+    });
+    return globalThis.btoa(bin.join(""));
+  }
+}
+// src/tasks/multimodal/documentQuestionAnswering.ts
+async function documentQuestionAnswering(args, options) {
+  const reqArgs = {
+    ...args,
+    inputs: {
+      question: args.inputs.question,
+      // convert Blob to base64
+      image: base64FromBytes(new Uint8Array(await args.inputs.image.arrayBuffer()))
+    }
+  };
+  const res = (await request(reqArgs, options))?.[0];
+  const isValidOutput = typeof res?.answer === "string" && typeof res.end === "number" && typeof res.score === "number" && typeof res.start === "number";
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{answer: string, end: number, score: number, start: number}>");
+  }
+  return res;
+}
+// src/tasks/multimodal/visualQuestionAnswering.ts
+async function visualQuestionAnswering(args, options) {
+  const reqArgs = {
+    ...args,
+    inputs: {
+      question: args.inputs.question,
+      // convert Blob to base64
+      image: base64FromBytes(new Uint8Array(await args.inputs.image.arrayBuffer()))
+    }
+  };
+  const res = (await request(reqArgs, options))?.[0];
+  const isValidOutput = typeof res?.answer === "string" && typeof res.score === "number";
+  if (!isValidOutput) {
+    throw new InferenceOutputError("Expected Array<{answer: string, score: number}>");
+  }
+  return res;
+}
+// src/HfInference.ts
+var HfInference = class {
+  accessToken;
+  defaultOptions;
+  constructor(accessToken = "", defaultOptions = {}) {
+    this.accessToken = accessToken;
+    this.defaultOptions = defaultOptions;
+    for (const [name, fn] of Object.entries(tasks_exports)) {
+      Object.defineProperty(this, name, {
+        enumerable: false,
+        value: (params, options) => (
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          fn({ ...params, accessToken }, { ...defaultOptions, ...options })
+        )
+      });
+    }
+  }
+  /**
+   * Returns copy of HfInference tied to a specified endpoint.
+   */
+  endpoint(endpointUrl) {
+    return new HfInferenceEndpoint(endpointUrl, this.accessToken, this.defaultOptions);
+  }
+};
+var HfInferenceEndpoint = class {
+  constructor(endpointUrl, accessToken = "", defaultOptions = {}) {
+    accessToken;
+    defaultOptions;
+    for (const [name, fn] of Object.entries(tasks_exports)) {
+      Object.defineProperty(this, name, {
+        enumerable: false,
+        value: (params, options) => (
+          // eslint-disable-next-line @typescript-eslint/no-explicit-any
+          fn({ ...params, accessToken, model: endpointUrl }, { ...defaultOptions, ...options })
+        )
+      });
+    }
+  }
+};
+export {
+  HfInference,
+  HfInferenceEndpoint,
+  audioClassification,
+  automaticSpeechRecognition,
+  conversational,
+  documentQuestionAnswering,
+  featureExtraction,
+  fillMask,
+  imageClassification,
+  imageSegmentation,
+  imageToText,
+  objectDetection,
+  questionAnswering,
+  request,
+  sentenceSimilarity,
+  streamingRequest,
+  summarization,
+  tableQuestionAnswering,
+  textClassification,
+  textGeneration,
+  textGenerationStream,
+  textToImage,
+  tokenClassification,
+  translation,
+  visualQuestionAnswering,
+  zeroShotClassification
+};