Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
•
e829671
1
Parent(s):
bcdca08
update
Browse files- cli/halueval-upload-cli.py +3 -1
- cli/isp-data-json/hindsight-neglect_classification.jsonl.gz +3 -0
- cli/isp-data-json/into-the-unknown_classification.jsonl.gz +3 -0
- cli/isp-data-json/memo-trap_classification.jsonl.gz +3 -0
- cli/isp-data-json/modus-tollens_classification.jsonl.gz +3 -0
- cli/isp-data-json/neqa_classification.jsonl.gz +3 -0
- cli/isp-data-json/pattern-matching-suppression_classification.jsonl.gz +3 -0
- cli/isp-data-json/prompt-injection_sequence-probability.jsonl.gz +3 -0
- cli/isp-data-json/redefine_classification.jsonl.gz +3 -0
- cli/isp-data-json/repetitive-algebra_classification.jsonl.gz +3 -0
- cli/isp-data-json/resisting-correction_classification.jsonl.gz +3 -0
- cli/isp-data-json/sig-figs_classification.jsonl.gz +3 -0
- cli/isp-upload-cli.py +23 -0
cli/halueval-upload-cli.py
CHANGED
@@ -24,6 +24,7 @@ for name in gold_splits:
|
|
24 |
# if name not in available_splits:
|
25 |
ds.push_to_hub(path, config_name=name)
|
26 |
|
|
|
27 |
def list_to_dict(lst: list) -> dict:
|
28 |
res = dict()
|
29 |
for entry in lst:
|
@@ -33,6 +34,7 @@ def list_to_dict(lst: list) -> dict:
|
|
33 |
res[k] += [v]
|
34 |
return res
|
35 |
|
|
|
36 |
for name in (gold_splits - {'general'}):
|
37 |
random.seed(42)
|
38 |
ds = name_to_ds[name]
|
@@ -67,4 +69,4 @@ for name in (gold_splits - {'general'}):
|
|
67 |
new_ds = Dataset.from_dict(new_ds_map)
|
68 |
new_dsd = DatasetDict({'data': new_ds})
|
69 |
|
70 |
-
new_dsd.push_to_hub(path, config_name=f'{name}_samples')
|
|
|
24 |
# if name not in available_splits:
|
25 |
ds.push_to_hub(path, config_name=name)
|
26 |
|
27 |
+
|
28 |
def list_to_dict(lst: list) -> dict:
|
29 |
res = dict()
|
30 |
for entry in lst:
|
|
|
34 |
res[k] += [v]
|
35 |
return res
|
36 |
|
37 |
+
|
38 |
for name in (gold_splits - {'general'}):
|
39 |
random.seed(42)
|
40 |
ds = name_to_ds[name]
|
|
|
69 |
new_ds = Dataset.from_dict(new_ds_map)
|
70 |
new_dsd = DatasetDict({'data': new_ds})
|
71 |
|
72 |
+
new_dsd.push_to_hub(path, config_name=f'{name}_samples')
|
cli/isp-data-json/hindsight-neglect_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa953115751b9125c851000d095273e0b2ad9880d3d83b4f8cafafd670fa1e03
|
3 |
+
size 46760
|
cli/isp-data-json/into-the-unknown_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1cec08142d7bf3cc69d4070449061311c8b057bae2fd6e3845d1bb69812c542f
|
3 |
+
size 231841
|
cli/isp-data-json/memo-trap_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fe709aaa7b942e8b0e6676fabf16c7129e47c305e72cd0ca70bcc8591cc4efec
|
3 |
+
size 27399
|
cli/isp-data-json/modus-tollens_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc9a37ad6922608b1836913f5ce9fab6905fec195570f8e55b4dea983644fab9
|
3 |
+
size 22312
|
cli/isp-data-json/neqa_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7ca6070f75c8b784d36c04c1083269db3cf0dece61f12652214b19f6781cb83a
|
3 |
+
size 14711
|
cli/isp-data-json/pattern-matching-suppression_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e5478c52424a5875f6190532e3cd4f797e2a867d8e073ff7f06b060335c10d6b
|
3 |
+
size 8337
|
cli/isp-data-json/prompt-injection_sequence-probability.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4bac3f2960422a047f5dd8eae31aa2b9274224ed654f7e6ff87e4a1d3a7e645
|
3 |
+
size 151669
|
cli/isp-data-json/redefine_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c59f8248192cd4a31ad3da5e6f0d8dc1a8107953668d95b395eec4aa8dc3e42e
|
3 |
+
size 9803
|
cli/isp-data-json/repetitive-algebra_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a022b3b94c80db3599eeca2434f9f612a110808306003867bb29b745b81d6f55
|
3 |
+
size 96342
|
cli/isp-data-json/resisting-correction_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:573ca2a5cf615f314d893f7e25b4abb2e990da1b62d57d4a39f0965a2e163991
|
3 |
+
size 566516
|
cli/isp-data-json/sig-figs_classification.jsonl.gz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dffaad70da805cf574d1733e2b7444639d125633b7fc44505dd28f9948b516de
|
3 |
+
size 921371
|
cli/isp-upload-cli.py
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/usr/bin/env python3
|
2 |
+
|
3 |
+
import glob
|
4 |
+
import os
|
5 |
+
|
6 |
+
import random
|
7 |
+
import requests
|
8 |
+
|
9 |
+
from datasets import load_dataset, Dataset, DatasetDict
|
10 |
+
|
11 |
+
folder_path = 'isp-data-json/' # Replace with your folder path
|
12 |
+
|
13 |
+
# Search for all .json files in the folder
|
14 |
+
json_files = glob.glob(os.path.join(folder_path, '*.jsonl'))
|
15 |
+
|
16 |
+
path = 'pminervini/inverse-scaling'
|
17 |
+
|
18 |
+
for json_path in json_files:
|
19 |
+
base_name = os.path.basename(json_path)
|
20 |
+
name = base_name.split("_")[0]
|
21 |
+
|
22 |
+
ds = load_dataset("json", data_files={'data': json_path})
|
23 |
+
ds.push_to_hub(path, config_name=name)
|