Spaces:
No application file
No application file
from __future__ import annotations | |
from typing import Any, Dict, List | |
import numpy as np | |
from ...data import Item, Items | |
from ...utils.util import convert_class_attr_to_dict | |
from .vision_object import Objects | |
from .shot_size import cal_shot_size_by_face | |
# 结构体定义 VideoMashup/videomashup/data_structure/vision_data_structure.py Frame | |
class Frame(Item): | |
def __init__( | |
self, | |
frame_idx: int, | |
objs: Objects = None, | |
scene: str = None, | |
caption: str = None, | |
shot_size: str = None, | |
shot_composition: str = None, | |
camera_angle: str = None, | |
field_depth: str = None, | |
content_width=None, | |
content_height=None, | |
**kwargs, | |
) -> None: | |
"""_summary_ | |
Args: | |
frame_idx (int): 帧序号 | |
objs (Objects, optional): 检测到的物体. Defaults to None. | |
scene (str, optional): 场景,天空、机场等. Defaults to None. | |
caption (str, optional): 文本描述. Defaults to None. | |
shot_size (str, optional): 景别. Defaults to None. | |
shot_composition (str, optional): 构图. Defaults to None. | |
camera_angle (str, optional): 相机角度. Defaults to None. | |
field_depth (str, optional): 景深. Defaults to None. | |
""" | |
self.frame_idx = frame_idx | |
self.objs = objs if isinstance(objs, Objects) else Objects(objs) | |
self.scene = scene | |
self.caption = caption | |
self.shot_size = shot_size | |
self.shot_composition = shot_composition | |
self.camera_angle = camera_angle | |
self.field_depth = field_depth | |
self.content_height = content_height | |
self.content_width = content_width | |
self.__dict__.update(**kwargs) | |
self.preprocess() | |
def preprocess(self): | |
if ( | |
self.shot_size is None | |
and self.content_height is not None | |
and self.content_width is not None | |
): | |
self.shot_size = self.cal_shot_size() | |
def cal_shot_size( | |
self, | |
): | |
"""计算当前帧的景别,目前使用人脸信息计算 | |
Returns: | |
str: 景别,参考 VideoMashup/videomashup/data_structure/vision_data_structure.py | |
""" | |
if len(self.objs.objs) > 0: | |
obj = self.objs.get_max_bbox_obj() | |
shot_size = cal_shot_size_by_face( | |
frame_width=self.content_width, | |
frame_height=self.content_height, | |
obj=obj, | |
) | |
else: | |
shot_size = "ExtremeWideShot" | |
return shot_size | |
def timestamp(self): | |
timestamp = self.frame_idx / self.fps | |
return timestamp | |
def to_dct(self, target_keys: List[str] = None, ignored_keys: List[str] = None): | |
dct = super().to_dct(target_keys, ignored_keys=["objs"]) | |
dct["objs"] = self.objs.to_dct() | |
return dct | |
def get_width_center_by_topkrole( | |
objs: list, | |
coord_offset=None, | |
) -> float: | |
"""通过视频镜头中的人物目标信息 计算适合剪辑的横轴中心点 | |
Args: | |
objs (list): 目标信息 | |
coord_offset (list, optional): 原视频的坐标和检测目标的坐标信息可能存在偏移,如有可使用该偏移矫正. Defaults to None. | |
Returns: | |
float: 横轴中心点 | |
""" | |
if coord_offset is None: | |
coord_offset = [0, 0] | |
min_roleid = str(min([int(x) for x in objs.keys()])) | |
target_role = objs[min_roleid] | |
bbox = [target_role["bbox"][x][0] for x in sorted(target_role["bbox"].keys())] | |
target_idx = int(len(bbox) // 2) | |
target_bbox = bbox[target_idx] | |
target_bbox = [ | |
target_bbox[0] - coord_offset[0], | |
target_bbox[1] - coord_offset[1], | |
target_bbox[2] - coord_offset[0], | |
target_bbox[3] - coord_offset[1], | |
] | |
target_center_x = (target_bbox[0] + target_bbox[2]) / 2 | |
return target_center_x | |
def get_time_center_by_topkrole( | |
objs: list, | |
) -> float: | |
"""计算主要目标人物的中心时间戳,适用于从原片段裁剪时序上的子片段,替代默认中间向两边 | |
Args: | |
objs (list): 有时间戳信息的目标人物列表 | |
Returns: | |
float: 中心时间戳 | |
""" | |
min_roleid = str(min([int(x) for x in objs.keys()])) | |
target_role = objs[min_roleid] | |
frame_idxs = [int(x) for x in target_role["bbox"].keys()] | |
frame_idx = np.mean(frame_idxs) | |
return frame_idx | |
class FrameSeq(Items): | |
def __init__(self, frameseq: Any = None, **kwargs): | |
super().__init__(frameseq) | |
self.frameseq = self.data | |
self.__dict__.update(**kwargs) | |
def from_data( | |
cls, datas: List[Frame], frame_kwargs: Dict = None, **kwargs | |
) -> FrameSeq: | |
if frame_kwargs is None: | |
frame_kwargs = {} | |
frameseq = [Frame(data, **frame_kwargs) for data in datas] | |
return FrameSeq(frameseq=frameseq, **kwargs) | |