File size: 4,546 Bytes
c202d7d
 
 
cc05af6
c202d7d
 
 
 
 
eb6e73c
c202d7d
 
 
 
cc05af6
 
 
 
 
 
 
 
 
 
eb6e73c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cc05af6
 
c202d7d
 
 
 
7d35d7a
eb6e73c
c202d7d
 
eb6e73c
 
 
c202d7d
 
 
 
eb6e73c
 
 
7d35d7a
eb6e73c
7d35d7a
eb6e73c
7d35d7a
eb6e73c
 
 
7d35d7a
 
 
 
 
eb6e73c
7d35d7a
c202d7d
 
 
 
 
 
 
eb6e73c
c202d7d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from dataclasses import dataclass
from enum import Enum


@dataclass
class Task:
    benchmark: str
    metric: str
    col_name: str
  

# Select your tasks here
# ---------------------------------------------------
class Tasks(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
    # task0 = Task("anli_r1", "acc", "ANLI")
    # task1 = Task("logiqa", "acc_norm", "LogiQA")
    task0 = Task("ncbi", "f1", "NCBI")
    task1 = Task("bc5cdr", "f1", "BC5CD")
    task3 = Task("chia", "f1", "CHIA")
    task4 = Task("biored", "f1", "BIORED")
    # task5 = Task("", "f1", "")
    # task6 = Task("", "f1", "")

@dataclass
class M2Type:
    benchmark: str
    metric: str
    col_name: str

class M2Types(Enum):
    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
    type0 = M2Type("condition", "f1", "CONDITION") 
    type1 = M2Type("measurement", "f1", "MEASUREMENT") 
    type2 = M2Type("drug", "f1", "DRUG") 
    type3 = M2Type("procedure", "f1", "PROCEDURE") 
    type4 = M2Type("gene", "f1", "GENE")
    type5 = M2Type("gene variant", "f1", "GENE VARIANT")


NUM_FEWSHOT = 0  # Change with your few shot
# ---------------------------------------------------


# Your leaderboard name
TITLE = """<h1 align="center" id="space-title">MEDICS NER Leaderboard</h1>"""
LOGO = """<img src="file/assets/image.png" alt="M2 X HF" width="500" height="333">"""
# What does your leaderboard evaluate?
INTRODUCTION_TEXT = """
Named Entity Recognition of clinical entities is crucial for advancing natural language processing (NLP) applications in healthcare as it is foundational for tasks such as information extraction, clinical decision support, and automated documentation. 
The datasets used for this evaluation encompass a wide range of medical entities, including diseases, symptoms, medications, procedures and anatomical terms. These datasets are sourced from openly available clinical data (including annotations) to ensure comprehensive coverage and reflect the complexity of real-world medical language. More details about the datasets included can be found in the "About" section.
The evaluation metrics used in this leaderboard focus primarily on the F1-score, a widely recognized measure of a model's accuracy. More details about the evaluation metric can be found in the "About" section
"""

# Which evaluations are you running? how can people reproduce what you have?
LLM_BENCHMARKS_TEXT = f"""

Note: It is important to note that the purpose of this evaluation is purely academic and exploratory. The models assessed here have not been approved for clinical use, and their results should not be interpreted as clinically validated. The leaderboard serves as a platform for researchers to compare models, understand their strengths and limitations, and drive further advancements in the field of clinical NLP.

## About
The Named Clinical Entity Recognition Leaderboard is aimed at advancing the field of natural language processing in healthcare. It provides a standardized platform for evaluating and comparing the performance of various language models in recognizing named clinical entities, a critical task for applications such as clinical documentation, decision support, and information extraction. By fostering transparency and facilitating benchmarking, the leaderboard's goal is to drive innovation and improvement in NLP models. It also helps researchers identify the strengths and weaknesses of different approaches, ultimately contributing to the development of more accurate and reliable tools for clinical use. Despite its exploratory nature, the leaderboard aims to play a role in guiding research and ensuring that advancements are grounded in rigorous and comprehensive evaluations. 

## How it works

### Datasets
📈 We evaluate the models on 4 datasets, encompassing 6 entity types
- NCBI 
- CHIA
- BIORED
- BC5CD

### Evaluation Metrics
We perceive NER objects as span(with character offsets) instead of token level artifacts. This enables us to expand to nested NER scenarios easily.


## Reproducibility
To reproduce our results, here is the commands you can run:

"""

EVALUATION_QUEUE_TEXT = """
Follow the steps detailed in the [medics_ner](https://github.com/WadoodAbdul/medics_ner/blob/3b415e9c4c9561ce5168374813072bde36658ff4/docs/submit_to_leaderboard.md) repo to upload you model to the leaderoard.
"""

CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
CITATION_BUTTON_TEXT = r"""
"""