|
import os |
|
from typing import List |
|
|
|
import pandas as pd |
|
|
|
from .utils import process_kernels, process_quantizations |
|
|
|
DATASET_DIRECTORY = "dataset" |
|
|
|
COLUMNS_MAPPING = { |
|
"config.name": "Experiment π§ͺ", |
|
"config.backend.model": "Model π€", |
|
|
|
"report.prefill.latency.p50": "Prefill (s)", |
|
"report.per_token.latency.p50": "Per Token (s)", |
|
"report.decode.throughput.value": "Decode (tokens/s)", |
|
"report.decode.efficiency.value": "Energy (tokens/kWh)", |
|
"report.decode.memory.max_allocated": "Memory (MB)", |
|
|
|
"config.backend.name": "Backend π", |
|
"config.backend.torch_dtype": "Precision π₯", |
|
"quantization": "Quantization ποΈ", |
|
"attention": "Attention ποΈ", |
|
"kernel": "Kernel βοΈ", |
|
|
|
"architecture": "Architecture ποΈ", |
|
"prefill+decode": "End-to-End (s)", |
|
"Average β¬οΈ": "Open LLM Score (%)", |
|
"#Params (B)": "Params (B)", |
|
} |
|
SORTING_COLUMNS = ["Open LLM Score (%)", "Decode (tokens/s)", "Prefill (s)"] |
|
SORTING_ASCENDING = [False, True, False] |
|
|
|
|
|
def get_raw_llm_perf_df( |
|
machine: str, subsets: List[str], backends: List[str], hardware_type: str |
|
): |
|
dfs = [] |
|
for subset in subsets: |
|
for backend in backends: |
|
try: |
|
dfs.append( |
|
pd.read_csv( |
|
f"hf://datasets/optimum-benchmark/llm-perf-leaderboard/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv" |
|
) |
|
) |
|
except Exception: |
|
print("Dataset not found for:") |
|
print(f" β’ Backend: {backend}") |
|
print(f" β’ Subset: {subset}") |
|
print(f" β’ Machine: {machine}") |
|
print(f" β’ Hardware Type: {hardware_type}") |
|
url = f"https://huggingface.co/datasets/optimum-benchmark/llm-perf-leaderboard/blob/main/perf-df-{backend}-{hardware_type}-{subset}-{machine}.csv" |
|
print(f" β’ URL: {url}") |
|
|
|
if len(dfs) == 0: |
|
raise ValueError( |
|
f"No datasets found for machine {machine}, check your hardware.yml config file or your datatset on huggingface" |
|
) |
|
|
|
perf_df = pd.concat(dfs) |
|
llm_df = pd.read_csv( |
|
"hf://datasets/optimum-benchmark/llm-perf-leaderboard/llm-df.csv" |
|
) |
|
|
|
llm_perf_df = pd.merge( |
|
llm_df, perf_df, left_on="Model", right_on="config.backend.model" |
|
) |
|
|
|
return llm_perf_df |
|
|
|
|
|
def processed_llm_perf_df(llm_perf_df): |
|
|
|
assert llm_perf_df["config.scenario.input_shapes.batch_size"].nunique() == 1 |
|
assert llm_perf_df["config.scenario.input_shapes.sequence_length"].nunique() == 1 |
|
assert llm_perf_df["config.scenario.generate_kwargs.max_new_tokens"].nunique() == 1 |
|
assert llm_perf_df["config.scenario.generate_kwargs.min_new_tokens"].nunique() == 1 |
|
|
|
llm_perf_df.dropna(subset=["report.decode.latency.p50"], inplace=True) |
|
llm_perf_df["config.name"] = llm_perf_df["config.name"].str.replace( |
|
"flash_attention_2", "fa2" |
|
) |
|
llm_perf_df["prefill+decode"] = ( |
|
llm_perf_df["report.prefill.latency.p50"] |
|
+ (llm_perf_df["report.decode.latency.p50"]) |
|
) |
|
|
|
|
|
|
|
llm_perf_df["architecture"] = llm_perf_df["Architecture"] |
|
llm_perf_df["attention"] = ( |
|
llm_perf_df["config.backend.attn_implementation"] |
|
.str.replace("flash_attention_2", "FAv2") |
|
.str.replace("eager", "Eager") |
|
.str.replace("sdpa", "SDPA") |
|
) |
|
llm_perf_df["quantization"] = llm_perf_df.apply(process_quantizations, axis=1) |
|
llm_perf_df["kernel"] = llm_perf_df.apply(process_kernels, axis=1) |
|
|
|
llm_perf_df = llm_perf_df.round( |
|
{ |
|
"report.prefill.latency.p50": 3, |
|
"report.decode.latency.p50": 3, |
|
"report.decode.throughput.value": 3, |
|
"report.decode.efficiency.value": 3, |
|
"report.decode.memory.max_allocated": 3, |
|
"Average β¬οΈ": 3, |
|
"prefill+decode": 3, |
|
"#Params (B)": 3, |
|
} |
|
) |
|
|
|
llm_perf_df = llm_perf_df[list(COLUMNS_MAPPING.keys())] |
|
|
|
llm_perf_df.rename(columns=COLUMNS_MAPPING, inplace=True) |
|
|
|
llm_perf_df.sort_values( |
|
by=SORTING_COLUMNS, |
|
ascending=SORTING_ASCENDING, |
|
inplace=True, |
|
) |
|
|
|
return llm_perf_df |
|
|
|
|
|
def get_llm_perf_df( |
|
machine: str, subsets: List[str], backends: List[str], hardware_type: str |
|
): |
|
if not os.path.exists(DATASET_DIRECTORY): |
|
os.makedirs(DATASET_DIRECTORY) |
|
|
|
if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"): |
|
llm_perf_df = pd.read_csv( |
|
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv" |
|
) |
|
else: |
|
print(f"Dataset machine {machine} not found, downloading...") |
|
llm_perf_df = get_raw_llm_perf_df(machine, subsets, backends, hardware_type) |
|
llm_perf_df = processed_llm_perf_df(llm_perf_df) |
|
llm_perf_df.to_csv( |
|
f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False |
|
) |
|
|
|
return llm_perf_df |
|
|