Spaces:
No application file
No application file
import io | |
from typing import List, Dict, Tuple, BinaryIO | |
from copy import deepcopy | |
import pandas as pd | |
from PIL import Image | |
from xlsxwriter.utility import xl_col_to_name | |
def read_image_data(path: str, target_image_height: int) -> Tuple[BinaryIO, int, int]: | |
"""读取图像,按照目标高做resize处理,并转化成二进制格式,返回最终图像的宽和高 | |
Args: | |
path (str): 图像路径 | |
target_image_height (int): 目标高 | |
Returns: | |
Tuple[BinaryIO, int, int]: 图像二进制格式,返回最终图像的宽和高 | |
""" | |
image = Image.open(path) | |
image_width_in_excel = int(image.width / (image.height / target_image_height)) | |
image = image.resize(size=(image_width_in_excel, target_image_height)) | |
image_byte = io.BytesIO() | |
image.save(image_byte, format="png") | |
return image_byte, image.width, image.height | |
def set_text_column_dynamic_width(worksheet, df, format, default_width=50): | |
"""将df在excel workshhet中的列按照实际内容长度设置列宽以及文本格式 | |
Args: | |
worksheet (_type_): 待处理的excel worksheet | |
df (_type_): worksheet中原来对应的DataFrame格式 | |
format (_type_): 对应列的文本格式 | |
default_width (int, optional): 默认目标宽度. Defaults to 50. | |
""" | |
for column in df: | |
column_width = max(df[column].astype(str).map(len).max(), len(column)) | |
col_idx = df.columns.get_loc(column) | |
width = min(column_width, default_width) | |
worksheet.set_column(col_idx, col_idx, width, format) | |
def convert_tasks2clean(tasks): | |
tasks = [{"prompt": task["prompt"]} for task in tasks] | |
return tasks | |
def split_tasks_by_images_lst(tasks, save_images_path_key: str = "save_images_path"): | |
new_tasks = [] | |
for task in tasks: | |
for image_path in task[save_images_path_key]: | |
new_task = deepcopy(task) | |
new_task[save_images_path_key] = image_path | |
new_tasks.append(new_task) | |
return new_tasks | |
def save_texts_images_2_csv(tasks: List[Dict], save_path: str): | |
"""存储相关结果为csv表格 | |
tasks (List[Dict]): 待转换的字典列表 | |
save_path (str): 表格存储路径 | |
""" | |
df = pd.DataFrame(tasks) | |
df.to_csv(save_path, encoding="utf_8_sig", index=False) | |
def add_multi_data_validation(workbook, worksheet, validates, validate_idxs, n_rows): | |
for i, validate in enumerate(validates): | |
validate_idx = validate_idxs[i] | |
worksheet = add_data_validation( | |
workbook=workbook, | |
worksheet=worksheet, | |
col=validate_idx, | |
head=validate["col_name"], | |
candidates=validate["candidates"], | |
colors=validate["colors"], | |
n_rows=n_rows, | |
) | |
return worksheet | |
def add_data_validation( | |
workbook, worksheet, col: int, head, candidates, n_rows, colors | |
): | |
col = xl_col_to_name(col) | |
# Adding the header and Datavalidation list | |
worksheet.write('{}1'.format(col), head) | |
colors_fmt = [workbook.add_format({'bg_color': color}) for color in colors] | |
for row in range(n_rows): | |
cell_idx = '{}{}'.format(col, row + 2) | |
worksheet.data_validation(cell_idx, {'validate': 'list', 'source': candidates}) | |
for i_c in range(len(candidates)): | |
worksheet.conditional_format( | |
cell_idx, | |
{ | |
'type': 'formula', | |
'criteria': '=${}=\"{}\"'.format(cell_idx, candidates[i_c]), | |
'format': colors_fmt[i_c], | |
}, | |
) | |
return worksheet | |
def insert_cell_image( | |
worksheet, | |
row, | |
col, | |
image_path, | |
image_height_in_table, | |
text_format, | |
row_ratio, | |
col_ratio, | |
): | |
image_byte, new_image_width, new_image_height = read_image_data( | |
image_path, target_image_height=image_height_in_table | |
) | |
# TODO:现在的图像列并不是预期内的和图像等宽,而是宽了很多 | |
worksheet.set_column( | |
col, | |
col, | |
int(new_image_width / col_ratio), | |
) | |
worksheet.insert_image( | |
row, | |
col, | |
image_path, | |
{"image_data": image_byte}, | |
) | |
worksheet.set_row(row, int(new_image_height / row_ratio), text_format) | |
return worksheet | |
def save_texts_images_2_excel( | |
tasks: List[Dict], | |
save_path: str, | |
image_height_in_table: int = 120, | |
row_ratio: float = 1.3, | |
col_ratio: float = 5, | |
validates: List = None, | |
): | |
"""将任务列表和生成的图像统一存储在表格中,方便观看对比实验结果。 | |
Args: | |
tasks (List[Dict]): 待转换的字典列表 | |
save_path (str): 表格存储路径 | |
image_height_in_table (int, optional): 表格中缩略图的高. Defaults to 120. | |
row_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 1.2. | |
col_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 7.5. | |
need_add_checker_column (bool, optional): 是否新增一列用于审核检查状态. Defaults to False. | |
""" | |
df = pd.DataFrame(tasks) | |
# 先找到需要插入图像的列,插入图像列 | |
keys_with_image = [ | |
k for k in tasks[0].keys() if "images_path" in k and k != "save_images_path" | |
] | |
high_priority_col_idx = 0 | |
# 默认save_images_path是生成图像,放在后面 | |
if "save_images_path" in tasks[0]: | |
keys_with_image.append("save_images_path") | |
for img_key in keys_with_image: | |
maxlen_img_key_value = max( | |
[ | |
len(task[img_key]) if isinstance(task[img_key], list) else 1 | |
for task in tasks | |
] | |
) | |
for i in range(maxlen_img_key_value): | |
column = "{}_{}".format(img_key, i) | |
if column not in df.columns: | |
df.insert( | |
loc=high_priority_col_idx, | |
column=column, | |
value="", | |
) | |
high_priority_col_idx += 1 | |
validate_start_idx = high_priority_col_idx | |
if validates is not None: | |
for i, validate in enumerate(validates): | |
if validate["col_name"] not in df.columns: | |
col_idx = validate_start_idx + i | |
df.insert(loc=col_idx, column=validate["col_name"], value="") | |
validate_idxs = range(validate_start_idx, validate_start_idx + len(validates)) | |
writer = pd.ExcelWriter(save_path, engine="xlsxwriter") | |
# Convert the dataframe to an XlsxWriter Excel object. | |
df.to_excel(writer, sheet_name="Sheet1", index=False) | |
# Get the xlsxwriter workbook and worksheet objects. | |
workbook = writer.book | |
worksheet = writer.sheets["Sheet1"] | |
n_col = len(df.columns) | |
n_row = len(df) | |
text_format = workbook.add_format({"text_wrap": True}) | |
set_text_column_dynamic_width(worksheet=worksheet, df=df, format=text_format) | |
# Insert an image. | |
for row in range(len(df)): | |
task = tasks[row] | |
cell_col = 0 | |
for im_idx, img_key in enumerate(keys_with_image): | |
images_path = task[img_key] | |
if not isinstance(images_path, list): | |
cell_row = 1 + row | |
worksheet.write(cell_row, cell_col, img_key) | |
if len(images_path) == 0: | |
continue | |
worksheet = insert_cell_image( | |
worksheet=worksheet, | |
row=cell_row, | |
col=cell_col, | |
image_path=images_path, | |
image_height_in_table=image_height_in_table, | |
text_format=text_format, | |
row_ratio=row_ratio, | |
col_ratio=col_ratio, | |
) | |
cell_col += 1 | |
else: | |
for i, image_path in enumerate(images_path): | |
worksheet = insert_cell_image( | |
worksheet=worksheet, | |
row=cell_row, | |
col=cell_col, | |
image_path=image_path, | |
image_height_in_table=image_height_in_table, | |
text_format=text_format, | |
row_ratio=row_ratio, | |
col_ratio=col_ratio, | |
) | |
cell_col += 1 | |
if validates is not None: | |
worksheet = add_multi_data_validation( | |
workbook, | |
worksheet, | |
validates=validates, | |
validate_idxs=validate_idxs, | |
n_rows=len(df), | |
) | |
# Close the Pandas Excel writer and output the Excel file. | |
writer.save() | |