Spaces:

kevinwang676
/

MuseV-test

No application file

File size: 8,730 Bytes

6755a2d

import io
from typing import List, Dict, Tuple, BinaryIO
from copy import deepcopy

import pandas as pd
from PIL import Image
from xlsxwriter.utility import xl_col_to_name

def read_image_data(path: str, target_image_height: int) -> Tuple[BinaryIO, int, int]:
    """读取图像，按照目标高做resize处理，并转化成二进制格式，返回最终图像的宽和高

    Args:
        path (str): 图像路径
        target_image_height (int): 目标高

    Returns:
        Tuple[BinaryIO, int, int]: 图像二进制格式，返回最终图像的宽和高
    """
    image = Image.open(path)
    image_width_in_excel = int(image.width / (image.height / target_image_height))
    image = image.resize(size=(image_width_in_excel, target_image_height))
    image_byte = io.BytesIO()
    image.save(image_byte, format="png")
    return image_byte, image.width, image.height


def set_text_column_dynamic_width(worksheet, df, format, default_width=50):
    """将df在excel workshhet中的列按照实际内容长度设置列宽以及文本格式

    Args:
        worksheet (_type_): 待处理的excel worksheet
        df (_type_): worksheet中原来对应的DataFrame格式
        format (_type_): 对应列的文本格式
        default_width (int, optional): 默认目标宽度. Defaults to 50.
    """
    for column in df:
        column_width = max(df[column].astype(str).map(len).max(), len(column))
        col_idx = df.columns.get_loc(column)
        width = min(column_width, default_width)
        worksheet.set_column(col_idx, col_idx, width, format)


def convert_tasks2clean(tasks):
    tasks = [{"prompt": task["prompt"]} for task in tasks]
    return tasks


def split_tasks_by_images_lst(tasks, save_images_path_key: str = "save_images_path"):
    new_tasks = []
    for task in tasks:
        for image_path in task[save_images_path_key]:
            new_task = deepcopy(task)
            new_task[save_images_path_key] = image_path
            new_tasks.append(new_task)
    return new_tasks


def save_texts_images_2_csv(tasks: List[Dict], save_path: str):
    """存储相关结果为csv表格

    tasks (List[Dict]): 待转换的字典列表
    save_path (str): 表格存储路径
    """
    df = pd.DataFrame(tasks)
    df.to_csv(save_path, encoding="utf_8_sig", index=False)


def add_multi_data_validation(workbook, worksheet, validates, validate_idxs, n_rows):
    for i, validate in enumerate(validates):
        validate_idx = validate_idxs[i]
        worksheet = add_data_validation(
            workbook=workbook,
            worksheet=worksheet,
            col=validate_idx,
            head=validate["col_name"],
            candidates=validate["candidates"],
            colors=validate["colors"],
            n_rows=n_rows,
        )
    return worksheet


def add_data_validation(
    workbook, worksheet, col: int, head, candidates, n_rows, colors
):
    col = xl_col_to_name(col)
    # Adding the header and Datavalidation list
    worksheet.write('{}1'.format(col), head)
    colors_fmt = [workbook.add_format({'bg_color': color}) for color in colors]
    for row in range(n_rows):
        cell_idx = '{}{}'.format(col, row + 2)
        worksheet.data_validation(cell_idx, {'validate': 'list', 'source': candidates})
        for i_c in range(len(candidates)):
            worksheet.conditional_format(
                cell_idx,
                {
                    'type': 'formula',
                    'criteria': '=${}=\"{}\"'.format(cell_idx, candidates[i_c]),
                    'format': colors_fmt[i_c],
                },
            )
    return worksheet


def insert_cell_image(
    worksheet,
    row,
    col,
    image_path,
    image_height_in_table,
    text_format,
    row_ratio,
    col_ratio,
):
    image_byte, new_image_width, new_image_height = read_image_data(
        image_path, target_image_height=image_height_in_table
    )
    # TODO：现在的图像列并不是预期内的和图像等宽，而是宽了很多
    worksheet.set_column(
        col,
        col,
        int(new_image_width / col_ratio),
    )
    worksheet.insert_image(
        row,
        col,
        image_path,
        {"image_data": image_byte},
    )
    worksheet.set_row(row, int(new_image_height / row_ratio), text_format)
    return worksheet


def save_texts_images_2_excel(
    tasks: List[Dict],
    save_path: str,
    image_height_in_table: int = 120,
    row_ratio: float = 1.3,
    col_ratio: float = 5,
    validates: List = None,
):
    """将任务列表和生成的图像统一存储在表格中，方便观看对比实验结果。

    Args:
        tasks (List[Dict]): 待转换的字典列表
        save_path (str): 表格存储路径
        image_height_in_table (int, optional): 表格中缩略图的高. Defaults to 120.
        row_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 1.2.
        col_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 7.5.
        need_add_checker_column (bool, optional): 是否新增一列用于审核检查状态. Defaults to False.
    """
    df = pd.DataFrame(tasks)
    # 先找到需要插入图像的列，插入图像列
    keys_with_image = [
        k for k in tasks[0].keys() if "images_path" in k and k != "save_images_path"
    ]
    high_priority_col_idx = 0
    # 默认save_images_path是生成图像，放在后面
    if "save_images_path" in tasks[0]:
        keys_with_image.append("save_images_path")
    for img_key in keys_with_image:
        maxlen_img_key_value = max(
            [
                len(task[img_key]) if isinstance(task[img_key], list) else 1
                for task in tasks
            ]
        )
        for i in range(maxlen_img_key_value):
            column = "{}_{}".format(img_key, i)
            if column not in df.columns:
                df.insert(
                    loc=high_priority_col_idx,
                    column=column,
                    value="",
                )
                high_priority_col_idx += 1

    validate_start_idx = high_priority_col_idx
    if validates is not None:
        for i, validate in enumerate(validates):
            if validate["col_name"] not in df.columns:
                col_idx = validate_start_idx + i
                df.insert(loc=col_idx, column=validate["col_name"], value="")
        validate_idxs = range(validate_start_idx, validate_start_idx + len(validates))
    writer = pd.ExcelWriter(save_path, engine="xlsxwriter")
    # Convert the dataframe to an XlsxWriter Excel object.
    df.to_excel(writer, sheet_name="Sheet1", index=False)
    # Get the xlsxwriter workbook and worksheet objects.
    workbook = writer.book
    worksheet = writer.sheets["Sheet1"]
    n_col = len(df.columns)
    n_row = len(df)
    text_format = workbook.add_format({"text_wrap": True})
    set_text_column_dynamic_width(worksheet=worksheet, df=df, format=text_format)
    # Insert an image.
    for row in range(len(df)):
        task = tasks[row]
        cell_col = 0
        for im_idx, img_key in enumerate(keys_with_image):
            images_path = task[img_key]
            if not isinstance(images_path, list):
                cell_row = 1 + row
                worksheet.write(cell_row, cell_col, img_key)
                if len(images_path) == 0:
                    continue
                worksheet = insert_cell_image(
                    worksheet=worksheet,
                    row=cell_row,
                    col=cell_col,
                    image_path=images_path,
                    image_height_in_table=image_height_in_table,
                    text_format=text_format,
                    row_ratio=row_ratio,
                    col_ratio=col_ratio,
                )
                cell_col += 1
            else:
                for i, image_path in enumerate(images_path):
                    worksheet = insert_cell_image(
                        worksheet=worksheet,
                        row=cell_row,
                        col=cell_col,
                        image_path=image_path,
                        image_height_in_table=image_height_in_table,
                        text_format=text_format,
                        row_ratio=row_ratio,
                        col_ratio=col_ratio,
                    )
                    cell_col += 1
    if validates is not None:
        worksheet = add_multi_data_validation(
            workbook,
            worksheet,
            validates=validates,
            validate_idxs=validate_idxs,
            n_rows=len(df),
        )
    # Close the Pandas Excel writer and output the Excel file.
    writer.save()