File size: 4,995 Bytes
6755a2d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
from __future__ import annotations

from typing import Any, Dict, List
import numpy as np

from ...data import Item, Items
from ...utils.util import convert_class_attr_to_dict

from .vision_object import Objects
from .shot_size import cal_shot_size_by_face


# 结构体定义 VideoMashup/videomashup/data_structure/vision_data_structure.py Frame
class Frame(Item):
    def __init__(
        self,
        frame_idx: int,
        objs: Objects = None,
        scene: str = None,
        caption: str = None,
        shot_size: str = None,
        shot_composition: str = None,
        camera_angle: str = None,
        field_depth: str = None,
        content_width=None,
        content_height=None,
        **kwargs,
    ) -> None:
        """_summary_

        Args:
            frame_idx (int): 帧序号
            objs (Objects, optional): 检测到的物体. Defaults to None.
            scene (str, optional): 场景,天空、机场等. Defaults to None.
            caption (str, optional): 文本描述. Defaults to None.
            shot_size (str, optional): 景别. Defaults to None.
            shot_composition (str, optional): 构图. Defaults to None.
            camera_angle (str, optional): 相机角度. Defaults to None.
            field_depth (str, optional): 景深. Defaults to None.
        """
        self.frame_idx = frame_idx
        self.objs = objs if isinstance(objs, Objects) else Objects(objs)
        self.scene = scene
        self.caption = caption
        self.shot_size = shot_size
        self.shot_composition = shot_composition
        self.camera_angle = camera_angle
        self.field_depth = field_depth
        self.content_height = content_height
        self.content_width = content_width
        self.__dict__.update(**kwargs)
        self.preprocess()

    def preprocess(self):
        if (
            self.shot_size is None
            and self.content_height is not None
            and self.content_width is not None
        ):
            self.shot_size = self.cal_shot_size()

    def cal_shot_size(
        self,
    ):
        """计算当前帧的景别,目前使用人脸信息计算

        Returns:
            str: 景别,参考 VideoMashup/videomashup/data_structure/vision_data_structure.py
        """
        if len(self.objs.objs) > 0:
            obj = self.objs.get_max_bbox_obj()
            shot_size = cal_shot_size_by_face(
                frame_width=self.content_width,
                frame_height=self.content_height,
                obj=obj,
            )
        else:
            shot_size = "ExtremeWideShot"
        return shot_size

    @property
    def timestamp(self):
        timestamp = self.frame_idx / self.fps
        return timestamp

    def to_dct(self, target_keys: List[str] = None, ignored_keys: List[str] = None):
        dct = super().to_dct(target_keys, ignored_keys=["objs"])
        dct["objs"] = self.objs.to_dct()
        return dct


def get_width_center_by_topkrole(
    objs: list,
    coord_offset=None,
) -> float:
    """通过视频镜头中的人物目标信息 计算适合剪辑的横轴中心点

    Args:
        objs (list): 目标信息
        coord_offset (list, optional): 原视频的坐标和检测目标的坐标信息可能存在偏移,如有可使用该偏移矫正. Defaults to None.

    Returns:
        float: 横轴中心点
    """
    if coord_offset is None:
        coord_offset = [0, 0]
    min_roleid = str(min([int(x) for x in objs.keys()]))
    target_role = objs[min_roleid]
    bbox = [target_role["bbox"][x][0] for x in sorted(target_role["bbox"].keys())]
    target_idx = int(len(bbox) // 2)
    target_bbox = bbox[target_idx]
    target_bbox = [
        target_bbox[0] - coord_offset[0],
        target_bbox[1] - coord_offset[1],
        target_bbox[2] - coord_offset[0],
        target_bbox[3] - coord_offset[1],
    ]
    target_center_x = (target_bbox[0] + target_bbox[2]) / 2
    return target_center_x


def get_time_center_by_topkrole(
    objs: list,
) -> float:
    """计算主要目标人物的中心时间戳,适用于从原片段裁剪时序上的子片段,替代默认中间向两边

    Args:
        objs (list): 有时间戳信息的目标人物列表

    Returns:
        float: 中心时间戳
    """
    min_roleid = str(min([int(x) for x in objs.keys()]))
    target_role = objs[min_roleid]
    frame_idxs = [int(x) for x in target_role["bbox"].keys()]
    frame_idx = np.mean(frame_idxs)
    return frame_idx


class FrameSeq(Items):
    def __init__(self, frameseq: Any = None, **kwargs):
        super().__init__(frameseq)
        self.frameseq = self.data
        self.__dict__.update(**kwargs)

    @classmethod
    def from_data(
        cls, datas: List[Frame], frame_kwargs: Dict = None, **kwargs
    ) -> FrameSeq:
        if frame_kwargs is None:
            frame_kwargs = {}
        frameseq = [Frame(data, **frame_kwargs) for data in datas]
        return FrameSeq(frameseq=frameseq, **kwargs)