Spaces:
No application file
No application file
File size: 4,995 Bytes
6755a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
from __future__ import annotations
from typing import Any, Dict, List
import numpy as np
from ...data import Item, Items
from ...utils.util import convert_class_attr_to_dict
from .vision_object import Objects
from .shot_size import cal_shot_size_by_face
# 结构体定义 VideoMashup/videomashup/data_structure/vision_data_structure.py Frame
class Frame(Item):
def __init__(
self,
frame_idx: int,
objs: Objects = None,
scene: str = None,
caption: str = None,
shot_size: str = None,
shot_composition: str = None,
camera_angle: str = None,
field_depth: str = None,
content_width=None,
content_height=None,
**kwargs,
) -> None:
"""_summary_
Args:
frame_idx (int): 帧序号
objs (Objects, optional): 检测到的物体. Defaults to None.
scene (str, optional): 场景,天空、机场等. Defaults to None.
caption (str, optional): 文本描述. Defaults to None.
shot_size (str, optional): 景别. Defaults to None.
shot_composition (str, optional): 构图. Defaults to None.
camera_angle (str, optional): 相机角度. Defaults to None.
field_depth (str, optional): 景深. Defaults to None.
"""
self.frame_idx = frame_idx
self.objs = objs if isinstance(objs, Objects) else Objects(objs)
self.scene = scene
self.caption = caption
self.shot_size = shot_size
self.shot_composition = shot_composition
self.camera_angle = camera_angle
self.field_depth = field_depth
self.content_height = content_height
self.content_width = content_width
self.__dict__.update(**kwargs)
self.preprocess()
def preprocess(self):
if (
self.shot_size is None
and self.content_height is not None
and self.content_width is not None
):
self.shot_size = self.cal_shot_size()
def cal_shot_size(
self,
):
"""计算当前帧的景别,目前使用人脸信息计算
Returns:
str: 景别,参考 VideoMashup/videomashup/data_structure/vision_data_structure.py
"""
if len(self.objs.objs) > 0:
obj = self.objs.get_max_bbox_obj()
shot_size = cal_shot_size_by_face(
frame_width=self.content_width,
frame_height=self.content_height,
obj=obj,
)
else:
shot_size = "ExtremeWideShot"
return shot_size
@property
def timestamp(self):
timestamp = self.frame_idx / self.fps
return timestamp
def to_dct(self, target_keys: List[str] = None, ignored_keys: List[str] = None):
dct = super().to_dct(target_keys, ignored_keys=["objs"])
dct["objs"] = self.objs.to_dct()
return dct
def get_width_center_by_topkrole(
objs: list,
coord_offset=None,
) -> float:
"""通过视频镜头中的人物目标信息 计算适合剪辑的横轴中心点
Args:
objs (list): 目标信息
coord_offset (list, optional): 原视频的坐标和检测目标的坐标信息可能存在偏移,如有可使用该偏移矫正. Defaults to None.
Returns:
float: 横轴中心点
"""
if coord_offset is None:
coord_offset = [0, 0]
min_roleid = str(min([int(x) for x in objs.keys()]))
target_role = objs[min_roleid]
bbox = [target_role["bbox"][x][0] for x in sorted(target_role["bbox"].keys())]
target_idx = int(len(bbox) // 2)
target_bbox = bbox[target_idx]
target_bbox = [
target_bbox[0] - coord_offset[0],
target_bbox[1] - coord_offset[1],
target_bbox[2] - coord_offset[0],
target_bbox[3] - coord_offset[1],
]
target_center_x = (target_bbox[0] + target_bbox[2]) / 2
return target_center_x
def get_time_center_by_topkrole(
objs: list,
) -> float:
"""计算主要目标人物的中心时间戳,适用于从原片段裁剪时序上的子片段,替代默认中间向两边
Args:
objs (list): 有时间戳信息的目标人物列表
Returns:
float: 中心时间戳
"""
min_roleid = str(min([int(x) for x in objs.keys()]))
target_role = objs[min_roleid]
frame_idxs = [int(x) for x in target_role["bbox"].keys()]
frame_idx = np.mean(frame_idxs)
return frame_idx
class FrameSeq(Items):
def __init__(self, frameseq: Any = None, **kwargs):
super().__init__(frameseq)
self.frameseq = self.data
self.__dict__.update(**kwargs)
@classmethod
def from_data(
cls, datas: List[Frame], frame_kwargs: Dict = None, **kwargs
) -> FrameSeq:
if frame_kwargs is None:
frame_kwargs = {}
frameseq = [Frame(data, **frame_kwargs) for data in datas]
return FrameSeq(frameseq=frameseq, **kwargs)
|