Spaces:

kevinwang676
/

MuseV-test

No application file

App Files Files Community

MuseV-test / mmcm /utils /table_util.py

kevinwang676

Upload folder using huggingface_hub

6755a2d verified 11 months ago

raw

history blame contribute delete

8.73 kB

	import io
	from typing import List, Dict, Tuple, BinaryIO
	from copy import deepcopy

	import pandas as pd
	from PIL import Image
	from xlsxwriter.utility import xl_col_to_name

	def read_image_data(path: str, target_image_height: int) -> Tuple[BinaryIO, int, int]:
	"""读取图像，按照目标高做resize处理，并转化成二进制格式，返回最终图像的宽和高

	Args:
	path (str): 图像路径
	target_image_height (int): 目标高

	Returns:
	Tuple[BinaryIO, int, int]: 图像二进制格式，返回最终图像的宽和高
	"""
	image = Image.open(path)
	image_width_in_excel = int(image.width / (image.height / target_image_height))
	image = image.resize(size=(image_width_in_excel, target_image_height))
	image_byte = io.BytesIO()
	image.save(image_byte, format="png")
	return image_byte, image.width, image.height


	def set_text_column_dynamic_width(worksheet, df, format, default_width=50):
	"""将df在excel workshhet中的列按照实际内容长度设置列宽以及文本格式

	Args:
	worksheet (_type_): 待处理的excel worksheet
	df (_type_): worksheet中原来对应的DataFrame格式
	format (_type_): 对应列的文本格式
	default_width (int, optional): 默认目标宽度. Defaults to 50.
	"""
	for column in df:
	column_width = max(df[column].astype(str).map(len).max(), len(column))
	col_idx = df.columns.get_loc(column)
	width = min(column_width, default_width)
	worksheet.set_column(col_idx, col_idx, width, format)


	def convert_tasks2clean(tasks):
	tasks = [{"prompt": task["prompt"]} for task in tasks]
	return tasks


	def split_tasks_by_images_lst(tasks, save_images_path_key: str = "save_images_path"):
	new_tasks = []
	for task in tasks:
	for image_path in task[save_images_path_key]:
	new_task = deepcopy(task)
	new_task[save_images_path_key] = image_path
	new_tasks.append(new_task)
	return new_tasks


	def save_texts_images_2_csv(tasks: List[Dict], save_path: str):
	"""存储相关结果为csv表格

	tasks (List[Dict]): 待转换的字典列表
	save_path (str): 表格存储路径
	"""
	df = pd.DataFrame(tasks)
	df.to_csv(save_path, encoding="utf_8_sig", index=False)


	def add_multi_data_validation(workbook, worksheet, validates, validate_idxs, n_rows):
	for i, validate in enumerate(validates):
	validate_idx = validate_idxs[i]
	worksheet = add_data_validation(
	workbook=workbook,
	worksheet=worksheet,
	col=validate_idx,
	head=validate["col_name"],
	candidates=validate["candidates"],
	colors=validate["colors"],
	n_rows=n_rows,
	)
	return worksheet


	def add_data_validation(
	workbook, worksheet, col: int, head, candidates, n_rows, colors
	):
	col = xl_col_to_name(col)
	# Adding the header and Datavalidation list
	worksheet.write('{}1'.format(col), head)
	colors_fmt = [workbook.add_format({'bg_color': color}) for color in colors]
	for row in range(n_rows):
	cell_idx = '{}{}'.format(col, row + 2)
	worksheet.data_validation(cell_idx, {'validate': 'list', 'source': candidates})
	for i_c in range(len(candidates)):
	worksheet.conditional_format(
	cell_idx,
	{
	'type': 'formula',
	'criteria': '=${}=\"{}\"'.format(cell_idx, candidates[i_c]),
	'format': colors_fmt[i_c],
	},
	)
	return worksheet


	def insert_cell_image(
	worksheet,
	row,
	col,
	image_path,
	image_height_in_table,
	text_format,
	row_ratio,
	col_ratio,
	):
	image_byte, new_image_width, new_image_height = read_image_data(
	image_path, target_image_height=image_height_in_table
	)
	# TODO：现在的图像列并不是预期内的和图像等宽，而是宽了很多
	worksheet.set_column(
	col,
	col,
	int(new_image_width / col_ratio),
	)
	worksheet.insert_image(
	row,
	col,
	image_path,
	{"image_data": image_byte},
	)
	worksheet.set_row(row, int(new_image_height / row_ratio), text_format)
	return worksheet


	def save_texts_images_2_excel(
	tasks: List[Dict],
	save_path: str,
	image_height_in_table: int = 120,
	row_ratio: float = 1.3,
	col_ratio: float = 5,
	validates: List = None,
	):
	"""将任务列表和生成的图像统一存储在表格中，方便观看对比实验结果。

	Args:
	tasks (List[Dict]): 待转换的字典列表
	save_path (str): 表格存储路径
	image_height_in_table (int, optional): 表格中缩略图的高. Defaults to 120.
	row_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 1.2.
	col_ratio (float, optional): excel的单元格宽高和实际图像边长需要做比例转换. Defaults to 7.5.
	need_add_checker_column (bool, optional): 是否新增一列用于审核检查状态. Defaults to False.
	"""
	df = pd.DataFrame(tasks)
	# 先找到需要插入图像的列，插入图像列
	keys_with_image = [
	k for k in tasks[0].keys() if "images_path" in k and k != "save_images_path"
	]
	high_priority_col_idx = 0
	# 默认save_images_path是生成图像，放在后面
	if "save_images_path" in tasks[0]:
	keys_with_image.append("save_images_path")
	for img_key in keys_with_image:
	maxlen_img_key_value = max(
	[
	len(task[img_key]) if isinstance(task[img_key], list) else 1
	for task in tasks
	]
	)
	for i in range(maxlen_img_key_value):
	column = "{}_{}".format(img_key, i)
	if column not in df.columns:
	df.insert(
	loc=high_priority_col_idx,
	column=column,
	value="",
	)
	high_priority_col_idx += 1

	validate_start_idx = high_priority_col_idx
	if validates is not None:
	for i, validate in enumerate(validates):
	if validate["col_name"] not in df.columns:
	col_idx = validate_start_idx + i
	df.insert(loc=col_idx, column=validate["col_name"], value="")
	validate_idxs = range(validate_start_idx, validate_start_idx + len(validates))
	writer = pd.ExcelWriter(save_path, engine="xlsxwriter")
	# Convert the dataframe to an XlsxWriter Excel object.
	df.to_excel(writer, sheet_name="Sheet1", index=False)
	# Get the xlsxwriter workbook and worksheet objects.
	workbook = writer.book
	worksheet = writer.sheets["Sheet1"]
	n_col = len(df.columns)
	n_row = len(df)
	text_format = workbook.add_format({"text_wrap": True})
	set_text_column_dynamic_width(worksheet=worksheet, df=df, format=text_format)
	# Insert an image.
	for row in range(len(df)):
	task = tasks[row]
	cell_col = 0
	for im_idx, img_key in enumerate(keys_with_image):
	images_path = task[img_key]
	if not isinstance(images_path, list):
	cell_row = 1 + row
	worksheet.write(cell_row, cell_col, img_key)
	if len(images_path) == 0:
	continue
	worksheet = insert_cell_image(
	worksheet=worksheet,
	row=cell_row,
	col=cell_col,
	image_path=images_path,
	image_height_in_table=image_height_in_table,
	text_format=text_format,
	row_ratio=row_ratio,
	col_ratio=col_ratio,
	)
	cell_col += 1
	else:
	for i, image_path in enumerate(images_path):
	worksheet = insert_cell_image(
	worksheet=worksheet,
	row=cell_row,
	col=cell_col,
	image_path=image_path,
	image_height_in_table=image_height_in_table,
	text_format=text_format,
	row_ratio=row_ratio,
	col_ratio=col_ratio,
	)
	cell_col += 1
	if validates is not None:
	worksheet = add_multi_data_validation(
	workbook,
	worksheet,
	validates=validates,
	validate_idxs=validate_idxs,
	n_rows=len(df),
	)
	# Close the Pandas Excel writer and output the Excel file.
	writer.save()