Christopher Glaze
commited on
Commit
•
cbc0f63
1
Parent(s):
fbe1af4
Add nltk resource
Browse files
handler.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
|
2 |
from typing import Dict, List, Union, Optional
|
|
|
3 |
from pathlib import Path
|
4 |
import json
|
5 |
import joblib
|
@@ -10,6 +11,9 @@ import torch
|
|
10 |
import numpy as np
|
11 |
from sklearn.base import TransformerMixin
|
12 |
|
|
|
|
|
|
|
13 |
class SimcseGenerator(TransformerMixin):
|
14 |
def __init__(
|
15 |
self, batch_size: int =16, model_name: str = "princeton-nlp/unsup-simcse-bert-base-uncased"
|
@@ -57,7 +61,7 @@ class EndpointHandler():
|
|
57 |
def __init__(self, path: str = ""):
|
58 |
|
59 |
if len(path)==0:
|
60 |
-
path =
|
61 |
else:
|
62 |
path = Path(path)
|
63 |
|
|
|
1 |
|
2 |
from typing import Dict, List, Union, Optional
|
3 |
+
import os
|
4 |
from pathlib import Path
|
5 |
import json
|
6 |
import joblib
|
|
|
11 |
import numpy as np
|
12 |
from sklearn.base import TransformerMixin
|
13 |
|
14 |
+
LOCAL_PATH = Path(__file__).parent
|
15 |
+
nltk.data.path.append(str(LOCAL_PATH/"nltk_data"))
|
16 |
+
|
17 |
class SimcseGenerator(TransformerMixin):
|
18 |
def __init__(
|
19 |
self, batch_size: int =16, model_name: str = "princeton-nlp/unsup-simcse-bert-base-uncased"
|
|
|
61 |
def __init__(self, path: str = ""):
|
62 |
|
63 |
if len(path)==0:
|
64 |
+
path = LOCAL_PATH
|
65 |
else:
|
66 |
path = Path(path)
|
67 |
|
nltk_data/tokenizers/punkt/PY3/english.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5cad3758596392364e3be9803dbd7ebeda384b68937b488a01365f5551bb942c
|
3 |
+
size 406697
|
nltk_data/tokenizers/punkt/english.pickle
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dda37972ae88998a6fd3e3ec002697a6bd362b32d050fda7d7ca5276873092aa
|
3 |
+
size 433305
|