ko-translation-leaderbaord / create_table.py
davidkim205's picture
Upload folder using huggingface_hub
577164e verified
import os
import pandas as pd
from collections import defaultdict
from evaluate import is_duplicated, is_length_exceed, get_average
from utils.decorate import cloud_model, decorate_model_name
from utils.file_handler import load_json
# results_bleu/μ—μ„œ 집계
def aggregate_bleu(json_data, bleu_table, src_table, length_table):
duplicate_count = defaultdict(int)
length_exceeds_count = defaultdict(int)
for data in json_data:
src_table[data["model"]]["Average"].append(data["bleu"])
src_table[data["model"]][data["src"]].append(data["bleu"])
if is_duplicated(data["generation"]):
duplicate_count[data["model"]] += 1
if is_length_exceed(data["reference"], data["generation"]):
length_exceeds_count[data["model"]] += 1
for model, row in src_table.items():
src_table[model] = dict((attr, get_average(val)) for attr, val in row.items())
# bleu, duplicate, length exceeds μΆ”κ°€
for model in src_table:
bleu_table[model]["Average"].append(src_table[model]["Average"])
bleu_table[model]["Bleu"] = src_table[model]["Average"]
bleu_table[model]["Duplicate"] = duplicate_count[model]
bleu_table[model]["Length Exceeds"] = length_exceeds_count[model]
# results_self/μ—μ„œ 집계
def aggregate_self(json_data, bleu_table, src_table, length_table):
sbleu_score = defaultdict(list)
for data in json_data:
sbleu_score[data["model"]].append(data["bleu"])
# sbleu μΆ”κ°€
for model in sbleu_score:
bleu_table[model]["SBleu"] = get_average(sbleu_score[model])
bleu_table[model]["Average"].append(bleu_table[model]["SBleu"])
# results_length/μ—μ„œ 집계
def aggregate_length(json_data, bleu_table, src_table, length_table):
for data in json_data:
length_table[data["model"]]["Average"].append(data["bleu"])
length_table[data["model"]][f"~{data['length']}"].append(data["bleu"])
for model, row in length_table.items():
length_table[model] = dict(
(attr, get_average(val)) for attr, val in row.items()
)
# bleu-sl μΆ”κ°€
for model in length_table:
bleu_table[model]["Bleu-SL"] = length_table[model]["Average"]
bleu_table[model]["Average"].append(bleu_table[model]["Bleu-SL"])
bleu_table[model]["Average"] = get_average(bleu_table[model]["Average"])
def create():
results_dirs = {
"results_bleu/": aggregate_bleu,
"results_self/": aggregate_self,
"results_length/": aggregate_length,
}
bleu_table = defaultdict(lambda: defaultdict(list))
src_table = defaultdict(lambda: defaultdict(list))
length_table = defaultdict(lambda: defaultdict(list))
tables = {
"bleu_and_sbleu": bleu_table,
"bleu_by_src": src_table,
"bleu_by_length": length_table,
}
# bleu score 집계
for dir, aggregate in results_dirs.items():
json_data = []
for filename in os.listdir(dir):
if not filename.endswith(".jsonl"):
continue
file_path = os.path.join(dir, filename)
json_data += load_json(file_path)
aggregate(json_data, *tables.values())
# table dataframe 생성
for table in tables:
df = pd.DataFrame.from_dict(tables[table], orient="index")
if table == "bleu_and_sbleu":
df["Duplicate"] = df.pop("Duplicate")
df["Length Exceeds"] = df.pop("Length Exceeds")
df.reset_index(inplace=True, names="Model")
df["Model"] = list(map(decorate_model_name, df["Model"]))
df.insert(
0,
"Type",
list(
map(
lambda x: "Cloud" if x in cloud_model else "HuggingFace",
df["Model"],
)
),
)
df.insert(
0, "Rank", df["Average"].rank(method="min", ascending=False).astype(int)
)
df = df.sort_values(by="Rank")
tables[table] = df
return tables.values()
def main():
tables = list(create())
print("# dataframe")
print(tables[0], "\n\n")
print("# markdown")
print(tables[0].to_markdown(index=False))
if __name__ == "__main__":
main()