first commit

Files changed (4) hide show

LID-40-3-2000000-1-4.bin ADDED Viewed

+version https://git-lfs.github.com/spec/v1
+oid sha256:987a2e16b216eb22f0342beb75874e9748cf6bceeb4ac75f6e2efc3414e74961
+size 32001553

README.md ADDED Viewed

+---
+license: agpl-3.0
+---

config.json ADDED Viewed

+{
+  "model_type": "floret",
+  "vocab_size": 2000000,
+  "embedding_dim": 300,
+  "hash_count": 4,
+  "minn": 3,
+  "maxn": 6,
+  "bucket": 2000000,
+  "num_labels": 40,
+  "id2label": {
+    "0": "English",
+    "1": "German",
+    "2": "French"
+  },
+  "label2id": {
+    "English": 0,
+    "German": 1,
+    "French": 2
+  }
+}

impresso_langident_wrapper.py ADDED Viewed

+import floret  # Assuming Floret is already installed
+class FloretLangIdentifier:
+    def __init__(self, model_path):
+        self.model = floret.load_model(model_path)
+    def predict(self, text):
+        predictions = self.model.predict(text)
+        return predictions
+from transformers import Pipeline
+class MyPipeline(Pipeline):
+    def _sanitize_parameters(self, **kwargs):
+        preprocess_kwargs = {}
+        if "maybe_arg" in kwargs:
+            preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
+        return preprocess_kwargs, {}, {}
+    def preprocess(self, inputs, maybe_arg=2):
+        return inputs
+    def _forward(self, model_inputs):
+        # model_inputs == {"model_input": model_input}
+        outputs = self.model.predict_language(**model_inputs)
+        # Maybe {"logits": Tensor(...)}
+        return outputs
+    def postprocess(self, model_outputs):
+        return model_outputs