csukuangfj commited on
Commit
5681610
·
1 Parent(s): 114a81a

add speaker diarization

Browse files
Files changed (1) hide show
  1. generate-speaker-diarization.py +168 -0
generate-speaker-diarization.py ADDED
@@ -0,0 +1,168 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import os
3
+ import re
4
+ from pathlib import Path
5
+ from typing import List
6
+
7
+ BASE_URL = "https://huggingface.co/csukuangfj/sherpa-onnx-apk/resolve/main/"
8
+
9
+ from dataclasses import dataclass
10
+
11
+
12
+ @dataclass
13
+ class APK:
14
+ major: int
15
+ minor: int
16
+ patch: int
17
+ arch: str
18
+
19
+ def __init__(self, s):
20
+ # sherpa-onnx-1.10.27-arm64-v8a-speaker-diarization-pyannote_audio-3dspeaker.apk
21
+ s = str(s).split("/")[-1]
22
+ split = s.split("-")
23
+ self.major, self.minor, self.patch = list(map(int, split[2].split(".")))
24
+ self.arch = split[3]
25
+ if "arm" in s:
26
+ self.arch += "-" + split[4]
27
+
28
+ if "armeabi" in self.arch:
29
+ self.arch = "y" + self.arch
30
+
31
+ if "arm64" in self.arch:
32
+ self.arch = "z" + self.arch
33
+
34
+
35
+ def sort_by_apk(x):
36
+ x = APK(x)
37
+ return (x.major, x.minor, x.patch, x.arch)
38
+
39
+
40
+ def get_all_files(d_list: List[str], suffix: str) -> List[str]:
41
+ if isinstance(d_list, str):
42
+ d_list = [d_list]
43
+
44
+ min_major = 1
45
+ min_minor = 9
46
+ min_patch = 10
47
+
48
+ ss = []
49
+ for d in d_list:
50
+ for root, _, files in os.walk(d):
51
+ for f in files:
52
+ if f.endswith(suffix):
53
+ major, minor, patch = list(map(int, f.split("-")[2].split(".")))
54
+ if major >= min_major and minor >= min_minor and patch >= min_patch:
55
+ ss.append(os.path.join(root, f))
56
+
57
+ ans = sorted(ss, key=sort_by_apk, reverse=True)
58
+
59
+ return list(map(lambda x: BASE_URL + str(x), ans))
60
+
61
+
62
+ def to_file(filename: str, files: List[str]):
63
+ content = r"""
64
+ <h1> APKs for Speaker diarization </h1>
65
+ This page lists the <strong>speaker diarization</strong> APKs for <a href="http://github.com/k2-fsa/sherpa-onnx">sherpa-onnx</a>,
66
+ one of the deployment frameworks of <a href="https://github.com/k2-fsa">the Next-gen Kaldi project</a>.
67
+ <br/>
68
+ The name of an APK has the following rule:
69
+ <ul>
70
+ <li> sherpa-onnx-{version}-{arch}-speaker-diarization-{segmentation_model}-{embedding_model}.apk
71
+ </ul>
72
+ where
73
+ <ul>
74
+ <li> version: It specifies the current version, e.g., 1.10.27
75
+ <li> arch: The architecture targeted by this APK, e.g., arm64-v8a, armeabi-v7a, x86_64, x86
76
+ <li> segmentation_model: The framework for the speaker segmentation model
77
+ <li> embedding_model: The framework for the speaker embedding extraction model
78
+ </ul>
79
+
80
+ <br/><br/>
81
+
82
+ <span style="color:red;">Note:</span> Please see
83
+ <a href="https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models">https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-segmentation-models</a> for a list of supported speaker segmentation models.
84
+ <br/><br/>
85
+
86
+ <span style="color:red;">Note:</span> Please see
87
+ <a href="https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models">https://github.com/k2-fsa/sherpa-onnx/releases/tag/speaker-recongition-models</a> for a list of supported speaker embedding extraction models.
88
+ <br/><br/>
89
+
90
+ <br/>
91
+ <br/>
92
+
93
+ <strong>Note about the license</strong> The code of Next-gen Kaldi is using
94
+ <a href="https://www.apache.org/licenses/LICENSE-2.0">Apache-2.0 license</a>. However,
95
+ we support models from different frameworks. Please check the license of your selected model.
96
+
97
+ <br/><br/>
98
+
99
+ <strong>Note about the build script</strong> You can find the script for building the APKs
100
+ at <a href="https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/build-apk-speaker-diarization.sh">https://github.com/k2-fsa/sherpa-onnx/blob/master/scripts/apk/build-apk-speaker-diarization.sh</a>
101
+
102
+ <br/><br/>
103
+
104
+ <style type="text/css">
105
+ .tg {border-collapse:collapse;border-spacing:0;}
106
+ .tg td{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
107
+ overflow:hidden;padding:10px 5px;word-break:normal;}
108
+ .tg th{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
109
+ font-weight:normal;overflow:hidden;padding:10px 5px;word-break:normal;}
110
+ .tg .tg-0pky{border-color:inherit;text-align:left;vertical-align:top}
111
+ .tg .tg-0lax{text-align:left;vertical-align:top}
112
+ </style>
113
+ <table class="tg">
114
+ <thead>
115
+ <tr>
116
+ <th class="tg-0pky">APK</th>
117
+ <th class="tg-0lax">Speaker segmentation model</th>
118
+ <th class="tg-0pky">Speaker embedding extraction model</th>
119
+ </tr>
120
+ </thead>
121
+ <tbody>
122
+ <tr>
123
+ <td class="tg-0pky">sherpa-onnx-x.y.z-arm64-v8a-speaker-diarization-pyannote_audio-3dspeaker.apk</td>
124
+ <td class="tg-0pky"><a href="https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-segmentation-models/sherpa-onnx-pyannote-segmentation-3-0.tar.bz2">sherpa-onnx-pyannote-segmentation-3-0.tar.bz2</a> It is <a href="https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/pyannote/segmentation">converted</a> from <a href="https://huggingface.co/pyannote/segmentation-3.0">https://huggingface.co/pyannote/segmentation-3.0</a></td>
125
+ <td class="tg-0pky"><a href="https://github.com/k2-fsa/sherpa-onnx/releases/download/speaker-recongition-models/3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx">3dspeaker_speech_eres2net_base_sv_zh-cn_3dspeaker_16k.onnx</a> It is <a href="https://github.com/k2-fsa/sherpa-onnx/tree/master/scripts/3dspeaker">converted</a> from <a href="https://github.com/alibaba-damo-academy/3D-Speaker">https://github.com/alibaba-damo-academy/3D-Speaker</a></td>
126
+ <tr>
127
+ </tbody>
128
+ </table>
129
+
130
+ <br/>
131
+ <br/>
132
+ <div/>
133
+ """
134
+
135
+ if "-cn" not in filename:
136
+ content += """
137
+ For Chinese users, please <a href="./apk-cn.html">visit this address</a>,
138
+ which replaces <a href="huggingface.co">huggingface.co</a> with <a href="hf-mirror.com">hf-mirror.com</a>
139
+ <br/>
140
+ <br/>
141
+ 中国用户, 请访问<a href="./apk-cn.html">这个地址</a>
142
+ <br/>
143
+ <br/>
144
+ """
145
+
146
+ with open(filename, "w") as f:
147
+ print(content, file=f)
148
+ for x in files:
149
+ name = x.rsplit("/", maxsplit=1)[-1]
150
+ print(f'<a href="{x}" />{name}<br/>', file=f)
151
+
152
+
153
+ def main():
154
+ apk = get_all_files("speaker-diarization", suffix=".apk")
155
+ to_file("./apk-speaker-diarization.html", apk)
156
+
157
+ # for Chinese users
158
+ apk2 = []
159
+ for a in apk:
160
+ a = a.replace("huggingface.co", "hf-mirror.com")
161
+ a = a.replace("resolve", "blob")
162
+ apk2.append(a)
163
+
164
+ to_file("./apk-speaker-diarization-cn.html", apk2)
165
+
166
+
167
+ if __name__ == "__main__":
168
+ main()