Spaces:
Sleeping
Sleeping
Add threads
Browse files
app.py
CHANGED
@@ -49,8 +49,8 @@ st.write('Loading the pretrained model ...')
|
|
49 |
model_name = "CarolXia/pii-kd-deberta-v2"
|
50 |
# config = PeftConfig.from_pretrained(model_name)
|
51 |
model = DebertaV2ForTokenClassification.from_pretrained(model_name, token=st.secrets["HUGGINGFACE_TOKEN"])
|
52 |
-
|
53 |
-
|
54 |
# Try quantization instead
|
55 |
# model = AutoModelForTokenClassification.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
56 |
tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/mdeberta-v3-base", token=st.secrets["HUGGINGFACE_TOKEN"])
|
@@ -141,18 +141,27 @@ entity_set=dict()
|
|
141 |
dataset = load_dataset("Isotonic/pii-masking-200k", split="train")
|
142 |
unmasked_text = dataset['unmasked_text'] # This will load the entire column inmemory. Must do this to avoid I/O delay later
|
143 |
|
144 |
-
st.write('
|
145 |
-
sizes = [0] *
|
146 |
start = time.time()
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
end = time.time()
|
158 |
length = end - start
|
|
|
49 |
model_name = "CarolXia/pii-kd-deberta-v2"
|
50 |
# config = PeftConfig.from_pretrained(model_name)
|
51 |
model = DebertaV2ForTokenClassification.from_pretrained(model_name, token=st.secrets["HUGGINGFACE_TOKEN"])
|
52 |
+
if torch.cuda.is_available():
|
53 |
+
model = model.to("cuda")
|
54 |
# Try quantization instead
|
55 |
# model = AutoModelForTokenClassification.from_pretrained(model_name, device_map="auto", load_in_8bit=True)
|
56 |
tokenizer = DebertaV2Tokenizer.from_pretrained("microsoft/mdeberta-v3-base", token=st.secrets["HUGGINGFACE_TOKEN"])
|
|
|
141 |
dataset = load_dataset("Isotonic/pii-masking-200k", split="train")
|
142 |
unmasked_text = dataset['unmasked_text'] # This will load the entire column inmemory. Must do this to avoid I/O delay later
|
143 |
|
144 |
+
st.write('Number of rows in the dataset ', dataset.num_rows)
|
145 |
+
sizes = [0] * 5
|
146 |
start = time.time()
|
147 |
+
# t0 = threading.Thread(target=process_datasets, args=(0, 50, unmasked_text, sizes, 0, entity_set, []))
|
148 |
+
# t1 = threading.Thread(target=process_datasets, args=(25, 50, unmasked_text, sizes, 1, entity_set, []))
|
149 |
+
# t2 = threading.Thread(target=process_datasets, args=(20, 30, unmasked_text, sizes, 2, entity_set, []))
|
150 |
+
# t3 = threading.Thread(target=process_datasets, args=(30, 40, unmasked_text, sizes, 3, entity_set, []))
|
151 |
+
# t4 = threading.Thread(target=process_datasets, args=(40, 50, unmasked_text, sizes, 4, entity_set, []))
|
152 |
+
# with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], profile_memory=True, record_shapes=True) as prof:
|
153 |
+
process_datasets(0, 50, unmasked_text, sizes, 0, entity_set, [])
|
154 |
+
# t0.start()
|
155 |
+
# t1.start()
|
156 |
+
# t2.start()
|
157 |
+
# t3.start()
|
158 |
+
# t4.start()
|
159 |
+
|
160 |
+
# t0.join()
|
161 |
+
# t1.join()
|
162 |
+
# t2.join()
|
163 |
+
# t3.join()
|
164 |
+
# t4.join()
|
165 |
|
166 |
end = time.time()
|
167 |
length = end - start
|