File size: 4,015 Bytes
9d3f9d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8582f66
9d3f9d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
# constants
OVERALL_INFO = ["Model", "Overall"]

risk_topic_1_columns = [
    "Crimes and Illegal Activities",
    "Cybersecurity",
    "Data Privacy",
    "Ethics and Morality",
    "Physical and Mental Health",
    "Hate Speech",
    "Extremism",
    "Inappropriate Suggestions"
]
risk_topic_1_columns = [item.lower() for item in risk_topic_1_columns]

attack_columns = [
    "Adaptive Attack",
    "Positive Induction",
    "Reverse Induction",
    "Code Injection",
    "Instruction Jailbreak",
    "Goal Hijacking",
    "Instruction Encryption",
    "DeepInception",
    "In-Context Attack",
    "Chain of Utterances",
    "Compositional Instructions"
]
attack_columns = [item.lower() for item in attack_columns]

XLSX_DIR = "./file//results.xlsx"

LEADERBOARD_INTRODUCTION = """# πŸ† S-Eval Leaderboard 
    ## πŸ”” Updates
    πŸ“£ [2024/10/25]: We release all 20,000 base risk prompts and 200,000 corresponding attack prompts ([Version-0.1.2](https://github.com/IS2Lab/S-Eval)). We also update [πŸ† LeaderBoard v0.1.2](https://huggingface.co/spaces/IS2Lab/S-Eval_v0.1.2) with new evaluation results including GPT-4 and other models.  
    πŸŽ‰ S-Eval has about **7,000** total views and about **2,000** total downloads across multiple platforms πŸŽ‰.

    πŸ“£ [2024/06/17]: We further release 10,000 base risk prompts and 100,000 corresponding attack prompts ([Version-0.1.1](https://github.com/IS2Lab/S-Eval)). If you require automatic safety evaluations, please feel free to submit a request via [Issues](https://huggingface.co/spaces/IS2Lab/S-Eval/discussions) or contact us by [Email](mailto:[email protected]).

    πŸ“£ [2024/05/31]: We release 20,000 corresponding attack prompts.

    πŸ“£ [2024/05/23]: We publish our [paper](https://arxiv.org/abs/2405.14191) and first release 2,000 base risk prompts. You can download the benchmark from our [project](https://github.com/IS2Lab/S-Eval), the [HuggingFace Dataset](https://huggingface.co/datasets/IS2Lab/S-Eval).

    ### ❗️ Note
    Due to the limited machine resource, please refresh the page if a connection timeout error occurs.

    You can get more detailed information from our [Project](https://github.com/IS2Lab/S-Eval) and [Paper](https://arxiv.org/abs/2405.14191).
"""

SELECT_SET_INTRO = (
    "Select whether Chinese or English results should be shown."
)

TABLE_INTRODUCTION_1 = """In the table below, we summarize the safety scores  (%) of differnet models on Base Risk Prompt Set."""
TABLE_INTRODUCTION_2 = """In the table below, we summarize the attack success rates (%) of the instruction attacks in Attack Prompt Set on different models"""


LEADERBORAD_INFO = """
   S-Eval is designed to be a new comprehensive, multi-dimensional and open-ended safety evaluation benchmark. So far, S-Eval has 220,000 evaluation prompts in total (and is still in active expansion), including 20,000 base risk prompts (10,000 in Chinese and 10,000 in English) and 200,000 *corresponding* attack prompts derived from 10 popular adversarial instruction attacks. These test prompts are generated based on a comprehensive and unified risk taxonomy, specifically designed to encompass all crucial dimensions of LLM safety evaluation and meant to accurately reflect the varied safety levels of LLMs across these risk dimensions. 
More details on the construction of the test suite including model-based test generation, selection and the expert critique LLM can be found in our [paper](https://arxiv.org/abs/2405.14191). 
"""


CITATION_BUTTON_LABEL = "If our work is useful for your own, you can cite us with the following BibTex entry:"

CITATION_BUTTON_TEXT = r"""
@article{yuan2024seval,
  title={S-Eval: Automatic and Adaptive Test Generation for Benchmarking Safety Evaluation of Large Language Models},
  author={Xiaohan Yuan and Jinfeng Li and Dongxia Wang and Yuefeng Chen and Xiaofeng Mao and Longtao Huang and Hui Xue and Wenhai Wang and Kui Ren and Jingyi Wang},
  journal={arXiv preprint arXiv:2405.14191},
  year={2024}
}
"""