#!/usr/bin/env python3 import os import re from pathlib import Path from typing import List from version import min_major, min_minor, min_patch BASE_URL = "https://huggingface.co/csukuangfj/sherpa-onnx-bin/resolve/main/" from dataclasses import dataclass @dataclass class APP: major: int minor: int patch: int os: str arch: str short_name: str def __init__(self, s): # sherpa-onnx-1.10.21-generate-subtitles-macos-arm64-paraformer_small_2024_03_09-zh_en.app.tar.bz2 s = str(s).split("/")[-1] split = s.split("-") self.major, self.minor, self.patch = list(map(int, split[2].split("."))) self.os = split[5] self.arch = split[6] self.lang = split[8] self.short_name = split[7] def sort_by_app(x): x = APP(x) return (x.major, x.minor, x.patch, x.os, x.arch, x.short_name, x.lang) def get_all_files(d_list: List[str], suffix: str) -> List[str]: if isinstance(d_list, str): d_list = [d_list] ss = [] for d in d_list: for root, _, files in os.walk(d): for f in files: if f.endswith(suffix): major, minor, patch = list(map(int, f.split("-")[2].split("."))) if major >= min_major and minor >= min_minor and patch >= min_patch: ss.append(os.path.join(root, f)) ans = sorted(ss, key=sort_by_app, reverse=True) return list(map(lambda x: BASE_URL + str(x), ans)) def to_file(filename: str, files: List[str]): content = r"""

APPs for generating subtitles (生成字幕)

This page lists the APPs for generating subtitles using sherpa-onnx, one of the deployment frameworks of the Next-gen Kaldi project.

Note
The name of an APP has the following rule: where

You can download all supported models from https://github.com/k2-fsa/sherpa-onnx/releases/tag/asr-models

If you are curious about how the APPs are built and want to find the source code, please visit https://k2-fsa.github.io/sherpa/onnx/lazarus/generate-subtitles.html

Note about the license The code of Next-gen Kaldi is using Apache-2.0 license. However, we support models from different frameworks. Please check the license of your selected model.

APP Comment ASR Model VAD Model
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-zipformer_reazonspeech_2024_08_01-ja It supports only Japanese (日语). Please refer to the doc for more details. sherpa-onnx-zipformer-ja-reazonspeech-2024-08-01.tar.bz2 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-zipformer_gigaspeech2-th It supports only Thai (泰语). Please refer to the doc for more details. sherpa-onnx-zipformer-thai-2024-06-20.tar.bz2 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-whisper_tiny.en-en It supports only English (英语). Please see the doc for more details. Whereas we are using tiny.en here, you are free to switch to other kinds of models, e.g., base. sherpa-onnx-whisper-tiny.en silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-moonshine_tiny-en It supports only English (英语). Please see the doc for more details. sherpa-onnx-moonshine-tiny-en-int8 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-sense_voice-zh_en_ko_ja_yue It supports Chinese, English, Korean, Japense, Cantonese (中文、英语、韩语、日语、粤语供 5 种语言). Please refer to the doc for more details. sherpa-onnx-sense-voice-zh-en-ja-ko-yue-2024-07-17.tar.bz2 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-telespeech_ctc-zh It supports only Chinese (支持非常多种中文方言). It is converted from https://github.com/Tele-AI/TeleSpeech-ASR. Please refer to the doc for more details. sherpa-onnx-telespeech-ctc-int8-zh-2024-06-04.tar.bz2 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-paraformer_2023_09_14--zh_en It supports Chinese and English (中英双语). Please refer to the doc for more details. sherpa-onnx-paraformer-zh-2023-09-14.tar.bz2 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-zipformer_wenetspeech-zh It supports only Chinese (中文). Please refer to the doc for more details. icefall-asr-zipformer-wenetspeech-20230615.tar.bz2 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-zipformer_gigaspeech-zh It supports only English (英语). Please refer to the doc for more details. sherpa-onnx-zipformer-gigaspeech-2023-12-12.tar.bz2 silero_vad.onnx
sherpa-onnx-x.y.z-generate-subtitles-windows-x64-paraformer_small_2024_03_09-zh_en It supports Chinese and English (中英双语). Please refer to the doc for more details. It uses a small Paraformer model. sherpa-onnx-paraformer-zh-small-2024-03-09.tar.bz2 silero_vad.onnx


""" if "-cn" not in filename: content += """ For Chinese users, please visit this address, which replaces huggingface.co with hf-mirror.com

中国用户, 请访问这个地址

""" with open(filename, "w") as f: print(content, file=f) for x in files: name = x.rsplit("/", maxsplit=1)[-1] print(f'{name}
', file=f) def main(): app = get_all_files("generate-subtitles", suffix=".tar.bz2") to_file("./download-generated-subtitles.html", app) # for Chinese users app2 = [] for a in app: a = a.replace("huggingface.co", "hf-mirror.com") a = a.replace("resolve", "blob") app2.append(a) to_file("./download-generated-subtitles-cn.html", app2) if __name__ == "__main__": main()