Gleb Vinarskis commited on
Commit
dc2b383
·
1 Parent(s): 6b54f4a

first commit

Browse files
LID-40-3-2000000-1-4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:987a2e16b216eb22f0342beb75874e9748cf6bceeb4ac75f6e2efc3414e74961
3
+ size 32001553
README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ ---
2
+ license: agpl-3.0
3
+ ---
config.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_type": "floret",
3
+ "vocab_size": 2000000,
4
+ "embedding_dim": 300,
5
+ "hash_count": 4,
6
+ "minn": 3,
7
+ "maxn": 6,
8
+ "bucket": 2000000,
9
+ "num_labels": 40,
10
+ "id2label": {
11
+ "0": "English",
12
+ "1": "German",
13
+ "2": "French"
14
+ },
15
+ "label2id": {
16
+ "English": 0,
17
+ "German": 1,
18
+ "French": 2
19
+ }
20
+ }
impresso_langident_wrapper.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import floret # Assuming Floret is already installed
2
+
3
+
4
+ class FloretLangIdentifier:
5
+ def __init__(self, model_path):
6
+ self.model = floret.load_model(model_path)
7
+
8
+ def predict(self, text):
9
+ predictions = self.model.predict(text)
10
+ return predictions
11
+
12
+
13
+
14
+
15
+
16
+
17
+ from transformers import Pipeline
18
+
19
+
20
+ class MyPipeline(Pipeline):
21
+ def _sanitize_parameters(self, **kwargs):
22
+ preprocess_kwargs = {}
23
+ if "maybe_arg" in kwargs:
24
+ preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
25
+ return preprocess_kwargs, {}, {}
26
+
27
+ def preprocess(self, inputs, maybe_arg=2):
28
+ return inputs
29
+
30
+ def _forward(self, model_inputs):
31
+ # model_inputs == {"model_input": model_input}
32
+ outputs = self.model.predict_language(**model_inputs)
33
+ # Maybe {"logits": Tensor(...)}
34
+ return outputs
35
+
36
+ def postprocess(self, model_outputs):
37
+ return model_outputs