lvkaokao
commited on
Commit
•
95b7a71
1
Parent(s):
82d5305
update.
Browse files- app.py +3 -3
- requirements.txt +1 -0
- src/display/about.py +6 -5
- src/display/utils.py +5 -5
app.py
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
import os
|
2 |
-
os.system("pip install gradio==3.28.0 pydantic==1.10.15")
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
@@ -262,7 +262,7 @@ with demo:
|
|
262 |
interactive=True,
|
263 |
elem_id="filter-columns-precision",
|
264 |
)
|
265 |
-
with gr.
|
266 |
gr.HTML("""<p style='padding-bottom: 0.5rem; '>Quantization config</p>""")
|
267 |
with gr.Row():
|
268 |
filter_columns_computeDtype = gr.Dropdown(choices=[i.value.name for i in ComputeDtype], label="Compute Dtype", multiselect=False, value="float16", interactive=True,)
|
|
|
1 |
+
# import os
|
2 |
+
# os.system("pip install gradio==3.28.0 pydantic==1.10.15")
|
3 |
|
4 |
import gradio as gr
|
5 |
import pandas as pd
|
|
|
262 |
interactive=True,
|
263 |
elem_id="filter-columns-precision",
|
264 |
)
|
265 |
+
with gr.Group() as config:
|
266 |
gr.HTML("""<p style='padding-bottom: 0.5rem; '>Quantization config</p>""")
|
267 |
with gr.Row():
|
268 |
filter_columns_computeDtype = gr.Dropdown(choices=[i.value.name for i in ComputeDtype], label="Compute Dtype", multiselect=False, value="float16", interactive=True,)
|
requirements.txt
CHANGED
@@ -16,3 +16,4 @@ tokenizers>=0.15.0
|
|
16 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
|
17 |
gradio==3.28.0
|
18 |
GitPython==3.1.40
|
|
|
|
16 |
gradio-space-ci @ git+https://huggingface.co/spaces/Wauplin/[email protected] # CI !!!
|
17 |
gradio==3.28.0
|
18 |
GitPython==3.1.40
|
19 |
+
pydantic==1.10.15
|
src/display/about.py
CHANGED
@@ -50,20 +50,21 @@ python main.py --model=hf-causal-experimental \
|
|
50 |
--num_fewshot=<n_few_shot> \
|
51 |
--batch_size=1 \
|
52 |
--output_path=<output_path>
|
|
|
53 |
```
|
54 |
|
55 |
**Note:** You can expect results to vary slightly for different batch sizes because of padding.
|
56 |
|
57 |
The tasks and few shots parameters are:
|
58 |
-
- ARC-C: 0-shot, *arc_challenge* (`
|
59 |
-
- ARC-E: 0-shot, *arc_easy* (`
|
60 |
-
- HellaSwag: 0-shot, *hellaswag* (`
|
61 |
- TruthfulQA: 0-shot, *truthfulqa_mc2* (`acc`)
|
62 |
- MMLU: 0-shot, *hendrycksTest-abstract_algebra,hendrycksTest-anatomy,hendrycksTest-astronomy,hendrycksTest-business_ethics,hendrycksTest-clinical_knowledge,hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_computer_science,hendrycksTest-college_mathematics,hendrycksTest-college_medicine,hendrycksTest-college_physics,hendrycksTest-computer_security,hendrycksTest-conceptual_physics,hendrycksTest-econometrics,hendrycksTest-electrical_engineering,hendrycksTest-elementary_mathematics,hendrycksTest-formal_logic,hendrycksTest-global_facts,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_computer_science,hendrycksTest-high_school_european_history,hendrycksTest-high_school_geography,hendrycksTest-high_school_government_and_politics,hendrycksTest-high_school_macroeconomics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_microeconomics,hendrycksTest-high_school_physics,hendrycksTest-high_school_psychology,hendrycksTest-high_school_statistics,hendrycksTest-high_school_us_history,hendrycksTest-high_school_world_history,hendrycksTest-human_aging,hendrycksTest-human_sexuality,hendrycksTest-international_law,hendrycksTest-jurisprudence,hendrycksTest-logical_fallacies,hendrycksTest-machine_learning,hendrycksTest-management,hendrycksTest-marketing,hendrycksTest-medical_genetics,hendrycksTest-miscellaneous,hendrycksTest-moral_disputes,hendrycksTest-moral_scenarios,hendrycksTest-nutrition,hendrycksTest-philosophy,hendrycksTest-prehistory,hendrycksTest-professional_accounting,hendrycksTest-professional_law,hendrycksTest-professional_medicine,hendrycksTest-professional_psychology,hendrycksTest-public_relations,hendrycksTest-security_studies,hendrycksTest-sociology,hendrycksTest-us_foreign_policy,hendrycksTest-virology,hendrycksTest-world_religions* (average of all the results `acc`)
|
63 |
- Winogrande: 0-shot, *winogrande* (`acc`)
|
64 |
- Lambada_Openai: 0-shot, *lambada_openai* (`acc`)
|
65 |
-
- PIQA: 0-shot, *piqa* (`
|
66 |
-
- OpenBookQA: 0-shot, *openbookqa* (`
|
67 |
- BoolQ: 0-shot, *boolq* (`acc`)
|
68 |
|
69 |
Side note on the baseline scores:
|
|
|
50 |
--num_fewshot=<n_few_shot> \
|
51 |
--batch_size=1 \
|
52 |
--output_path=<output_path>
|
53 |
+
|
54 |
```
|
55 |
|
56 |
**Note:** You can expect results to vary slightly for different batch sizes because of padding.
|
57 |
|
58 |
The tasks and few shots parameters are:
|
59 |
+
- ARC-C: 0-shot, *arc_challenge* (`acc`)
|
60 |
+
- ARC-E: 0-shot, *arc_easy* (`acc`)
|
61 |
+
- HellaSwag: 0-shot, *hellaswag* (`acc`)
|
62 |
- TruthfulQA: 0-shot, *truthfulqa_mc2* (`acc`)
|
63 |
- MMLU: 0-shot, *hendrycksTest-abstract_algebra,hendrycksTest-anatomy,hendrycksTest-astronomy,hendrycksTest-business_ethics,hendrycksTest-clinical_knowledge,hendrycksTest-college_biology,hendrycksTest-college_chemistry,hendrycksTest-college_computer_science,hendrycksTest-college_mathematics,hendrycksTest-college_medicine,hendrycksTest-college_physics,hendrycksTest-computer_security,hendrycksTest-conceptual_physics,hendrycksTest-econometrics,hendrycksTest-electrical_engineering,hendrycksTest-elementary_mathematics,hendrycksTest-formal_logic,hendrycksTest-global_facts,hendrycksTest-high_school_biology,hendrycksTest-high_school_chemistry,hendrycksTest-high_school_computer_science,hendrycksTest-high_school_european_history,hendrycksTest-high_school_geography,hendrycksTest-high_school_government_and_politics,hendrycksTest-high_school_macroeconomics,hendrycksTest-high_school_mathematics,hendrycksTest-high_school_microeconomics,hendrycksTest-high_school_physics,hendrycksTest-high_school_psychology,hendrycksTest-high_school_statistics,hendrycksTest-high_school_us_history,hendrycksTest-high_school_world_history,hendrycksTest-human_aging,hendrycksTest-human_sexuality,hendrycksTest-international_law,hendrycksTest-jurisprudence,hendrycksTest-logical_fallacies,hendrycksTest-machine_learning,hendrycksTest-management,hendrycksTest-marketing,hendrycksTest-medical_genetics,hendrycksTest-miscellaneous,hendrycksTest-moral_disputes,hendrycksTest-moral_scenarios,hendrycksTest-nutrition,hendrycksTest-philosophy,hendrycksTest-prehistory,hendrycksTest-professional_accounting,hendrycksTest-professional_law,hendrycksTest-professional_medicine,hendrycksTest-professional_psychology,hendrycksTest-public_relations,hendrycksTest-security_studies,hendrycksTest-sociology,hendrycksTest-us_foreign_policy,hendrycksTest-virology,hendrycksTest-world_religions* (average of all the results `acc`)
|
64 |
- Winogrande: 0-shot, *winogrande* (`acc`)
|
65 |
- Lambada_Openai: 0-shot, *lambada_openai* (`acc`)
|
66 |
+
- PIQA: 0-shot, *piqa* (`acc`)
|
67 |
+
- OpenBookQA: 0-shot, *openbookqa* (`acc`)
|
68 |
- BoolQ: 0-shot, *boolq* (`acc`)
|
69 |
|
70 |
Side note on the baseline scores:
|
src/display/utils.py
CHANGED
@@ -14,14 +14,14 @@ class Task:
|
|
14 |
col_name: str
|
15 |
|
16 |
class Tasks(Enum):
|
17 |
-
arc = Task("arc:challenge", "
|
18 |
-
arc_easy = Task("arc:easy", "
|
19 |
boolq = Task("boolq", "acc,none", "Boolq")
|
20 |
-
hellaswag = Task("hellaswag", "
|
21 |
lambada_openai = Task("lambada:openai", "acc,none", "Lambada_openai")
|
22 |
mmlu = Task("mmlu", "acc,none", "MMLU")
|
23 |
-
openbookqa = Task("openbookqa", "
|
24 |
-
piqa = Task("piqa", "
|
25 |
# truthfulqa:mc1 / truthfulqa:mc2 -- ?
|
26 |
truthfulqa_mc = Task("truthfulqa:mc1", "acc,none", "Truthfulqa_mc1")
|
27 |
# arc:challenge ?
|
|
|
14 |
col_name: str
|
15 |
|
16 |
class Tasks(Enum):
|
17 |
+
arc = Task("arc:challenge", "acc,none", "ARC-c")
|
18 |
+
arc_easy = Task("arc:easy", "acc,none", "ARC-e")
|
19 |
boolq = Task("boolq", "acc,none", "Boolq")
|
20 |
+
hellaswag = Task("hellaswag", "acc,none", "HellaSwag")
|
21 |
lambada_openai = Task("lambada:openai", "acc,none", "Lambada_openai")
|
22 |
mmlu = Task("mmlu", "acc,none", "MMLU")
|
23 |
+
openbookqa = Task("openbookqa", "acc,none", "Openbookqa")
|
24 |
+
piqa = Task("piqa", "acc,none", "Piqa")
|
25 |
# truthfulqa:mc1 / truthfulqa:mc2 -- ?
|
26 |
truthfulqa_mc = Task("truthfulqa:mc1", "acc,none", "Truthfulqa_mc1")
|
27 |
# arc:challenge ?
|