import os import pandas as pd filenames = os.listdir("data_translated") df = pd.read_feather("data/ccs_synthetic_multi.feather") df_list = [pd.read_feather(os.path.join("data_translated", filename)) for filename in filenames] df_multi = pd.concat(df_list) df_multi = df_multi.reset_index(drop=True) df = df.drop("caption_multi", axis=1) df = df.merge(df_multi[["caption_multi", "index"]], how="left", on="index") df = df[ [ "caption", "caption_sv", "caption_multi", "url", "multi_language_code", "multi_language_name", "multi_target", "target_code", "opus_mt_url", "index", ] ] df = df.rename(columns={"multi_target": "multiple_target_model"}) df["opus_mt_url"] = df["opus_mt_url"].str.replace("https://huggingface.co/", "") df.to_feather("ccs_synthetic.feather")