File size: 8,730 Bytes
6755a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
import io
from typing import List, Dict, Tuple, BinaryIO
from copy import deepcopy

import pandas as pd
from PIL import Image
from xlsxwriter.utility import xl_col_to_name

def read_image_data(path: str, target_image_height: int) -> Tuple[BinaryIO, int, int]:
    """读取图像,按照目标高做resize处理,并转化成二进制格式,返回最终图像的宽和高

    Args:
        path (str): 图像路径
        target_image_height (int): 目标高

    Returns:
        Tuple[BinaryIO, int, int]: 图像二进制格式,返回最终图像的宽和高
    """
    image = Image.open(path)
    image_width_in_excel = int(image.width / (image.height / target_image_height))
    image = image.resize(size=(image_width_in_excel, target_image_height))
    image_byte = io.BytesIO()
    image.save(image_byte, format="png")
    return image_byte, image.width, image.height


def set_text_column_dynamic_width(worksheet, df, format, default_width=50):
    """将df在excel workshhet中的列按照实际内容长度设置列宽以及文本格式

    Args:
        worksheet (_type_): 待处理的excel worksheet
        df (_type_): worksheet中原来对应的DataFrame格式
        format (_type_): 对应列的文本格式
        default_width (int, optional): 默认目标宽度. Defaults to 50.
    """
    for column in df:
        column_width = max(df[column].astype(str).map(len).max(), len(column))
        col_idx = df.columns.get_loc(column)
        width = min(column_width, default_width)
        worksheet.set_column(col_idx, col_idx, width, format)


def convert_tasks2clean(tasks):
    tasks = [{"prompt": task["prompt"]} for task in tasks]
    return tasks


def split_tasks_by_images_lst(tasks, save_images_path_key: str = "save_images_path"):
    new_tasks = []
    for task in tasks:
        for image_path in task[save_images_path_key]:
            new_task = deepcopy(task)
            new_task[save_images_path_key] = image_path
            new_tasks.append(new_task)
    return new_tasks


def save_texts_images_2_csv(tasks: List[Dict], save_path: str):
    """存储相关结果为csv表格

    tasks (List[Dict]): 待转换的字典列表
    save_path (str): 表格存储路径
    """
    df = pd.DataFrame(tasks)
    df.to_csv(save_path, encoding="utf_8_sig", index=False)


def add_multi_data_validation(workbook, worksheet, validates, validate_idxs, n_rows):
    for i, validate in enumerate(validates):
        validate_idx = validate_idxs[i]
        worksheet = add_data_validation(
            workbook=workbook,
            worksheet=worksheet,
            col=validate_idx,
            head=validate["col_name"],
            candidates=validate["candidates"],
            colors=validate["colors"],
            n_rows=n_rows,
        )
    return worksheet


def add_data_validation(
    workbook, worksheet, col: int, head, candidates, n_rows, colors
):
    col = xl_col_to_name(col)
    # Adding the header and Datavalidation list
    worksheet.write('{}1'.format(col), head)
    colors_fmt = [workbook.add_format({'bg_color': color}) for color in colors]
    for row in range(n_rows):
        cell_idx = '{}{}'.format(col, row + 2)
        worksheet.data_validation(cell_idx, {'validate': 'list', 'source': candidates})
        for i_c in range(len(candidates)):
            worksheet.conditional_format(
                cell_idx,
                {
                    'type': 'formula',
                    'criteria': '=${}=\"{}\"'.format(cell_idx, candidates[i_c]),
                    'format': colors_fmt[i_c],
                },
            )
    return worksheet


def insert_cell_image(
    worksheet,
    row,
    col,
    image_path,
    image_height_in_table,
    text_format,
    row_ratio,
    col_ratio,
):
    image_byte, new_image_width, new_image_height = read_image_data(
        image_path, target_image_height=image_height_in_table
    )
    # TODO:现在的图像列并不是预期内的和图像等宽,而是宽了很多
    worksheet.set_column(
        col,
        col,
        int(new_image_width / col_ratio),
    )
    worksheet.insert_image(
        row,
        col,
        image_path,
        {"image_data": image_byte},
    )
    worksheet.set_row(row, int(new_image_height / row_ratio), text_format)
    return worksheet


def save_texts_images_2_excel(
    tasks: List[Dict],
    save_path: str,
    image_height_in_table: int = 120,
    row_ratio: float = 1.3,
    col_ratio: float = 5,
    validates: List = None,
):
    """将任务列表和生成的图像统一存储在表格中,方便观看对比实验结果。

    Args:
        tasks (List[Dict]): 待转换的字典列表
        save_path (str): 表格存储路径
        image_height_in_table (int, optional): 表格中缩略图的高. Defaults to 120.
        row_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 1.2.
        col_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 7.5.
        need_add_checker_column (bool, optional): 是否新增一列用于审核检查状态. Defaults to False.
    """
    df = pd.DataFrame(tasks)
    # 先找到需要插入图像的列,插入图像列
    keys_with_image = [
        k for k in tasks[0].keys() if "images_path" in k and k != "save_images_path"
    ]
    high_priority_col_idx = 0
    # 默认save_images_path是生成图像,放在后面
    if "save_images_path" in tasks[0]:
        keys_with_image.append("save_images_path")
    for img_key in keys_with_image:
        maxlen_img_key_value = max(
            [
                len(task[img_key]) if isinstance(task[img_key], list) else 1
                for task in tasks
            ]
        )
        for i in range(maxlen_img_key_value):
            column = "{}_{}".format(img_key, i)
            if column not in df.columns:
                df.insert(
                    loc=high_priority_col_idx,
                    column=column,
                    value="",
                )
                high_priority_col_idx += 1

    validate_start_idx = high_priority_col_idx
    if validates is not None:
        for i, validate in enumerate(validates):
            if validate["col_name"] not in df.columns:
                col_idx = validate_start_idx + i
                df.insert(loc=col_idx, column=validate["col_name"], value="")
        validate_idxs = range(validate_start_idx, validate_start_idx + len(validates))
    writer = pd.ExcelWriter(save_path, engine="xlsxwriter")
    # Convert the dataframe to an XlsxWriter Excel object.
    df.to_excel(writer, sheet_name="Sheet1", index=False)
    # Get the xlsxwriter workbook and worksheet objects.
    workbook = writer.book
    worksheet = writer.sheets["Sheet1"]
    n_col = len(df.columns)
    n_row = len(df)
    text_format = workbook.add_format({"text_wrap": True})
    set_text_column_dynamic_width(worksheet=worksheet, df=df, format=text_format)
    # Insert an image.
    for row in range(len(df)):
        task = tasks[row]
        cell_col = 0
        for im_idx, img_key in enumerate(keys_with_image):
            images_path = task[img_key]
            if not isinstance(images_path, list):
                cell_row = 1 + row
                worksheet.write(cell_row, cell_col, img_key)
                if len(images_path) == 0:
                    continue
                worksheet = insert_cell_image(
                    worksheet=worksheet,
                    row=cell_row,
                    col=cell_col,
                    image_path=images_path,
                    image_height_in_table=image_height_in_table,
                    text_format=text_format,
                    row_ratio=row_ratio,
                    col_ratio=col_ratio,
                )
                cell_col += 1
            else:
                for i, image_path in enumerate(images_path):
                    worksheet = insert_cell_image(
                        worksheet=worksheet,
                        row=cell_row,
                        col=cell_col,
                        image_path=image_path,
                        image_height_in_table=image_height_in_table,
                        text_format=text_format,
                        row_ratio=row_ratio,
                        col_ratio=col_ratio,
                    )
                    cell_col += 1
    if validates is not None:
        worksheet = add_multi_data_validation(
            workbook,
            worksheet,
            validates=validates,
            validate_idxs=validate_idxs,
            n_rows=len(df),
        )
    # Close the Pandas Excel writer and output the Excel file.
    writer.save()