from typing import Union, List import logging import h5py import numpy as np from .emb import MediaMapEmb logger = logging.getLogger(__name__) # pylint: disable=invalid-name __all__ = ["H5pyMediaMapEmb", "save_value_with_h5py"] def save_value_with_h5py( path: str, value: Union[np.ndarray, None], key: str, idx: Union[int, List[int]] = None, dtype=None, shape=None, overwrite: bool = False, ): with h5py.File(path, "a") as f: if dtype is None: dtype = value.dtype if shape is None: shape = value.shape del_key = False if key in f: if overwrite: del_key = True if f[key].dtype != h5py.special_dtype(vlen=str): if f[key].shape != value.shape: del_key = True if del_key: del f[key] if key not in f: f.create_dataset(key, shape=shape, dtype=dtype) if idx is None: f[key][...] = value else: f[key][idx] = value class H5pyMediaMapEmb(MediaMapEmb): def __init__(self, path: str) -> None: """ OfflineEmb = { "overall_algo": Emb, # 整个文件的Emb # 整个文件的多维度 Emb "theme": np.array, # 主题, "emotion_algo": np.array, # 情绪, "semantic_algo": np.array, # 语义 "clips_overall_algo": np.array, n_clip x clip_emb "clips_emotion_algo": np.array, n_clip x clip_emb "clips_semantic_algo": np.array, n_clip x clip_emb "clips_theme_algo": np.array, n_clip x clip_emb "scenes_overall_algo": np.array, n_scenes x scene_emb "scenes_emotion_algo": np.array, n_scenes x scene_emb "scenes_semantic_algo": np.array, n_scenes x scene_emb "scenes_theme_algo": E np.arraymb, n_scenes x scene_emb # 片段可以是转场切分、MusicStage等, clips目前属于转场切分片段 # 若后续需要新增段落分割,可以和clips同级新增 stage字段。 "frames_overall_algo": np.array, n_frames x frame_emb "frames_emotion_algo": np.array, n_frames x frame_emb "frames_semantic_algo": np.array, n_frames x frame_emb "frames_theme_algo": np.array, n_frames x frame_emb "frames_objs_algo": { "frame_id_algo": { # "overall_algo": np.array, n_objs x obj_emb "emotion_algo": np.array, n_objs x obj_emb "semantic_algo": np.array, n_objs x obj_emb "theme_algo": np.array, n_objs x obj_emb } } "roles_algo": { "roleid": np.array, n x obj_emb } } Args: path (str): hdf5 存储路径 """ super().__init__(path) # 待优化支持 with open 的方式来读写 self.f = h5py.File(path, "a") def _keys_index(self, key): if not isinstance(key, list): key = [key] key = "/".join([str(x) for x in key if x is not None]) return key def get_value(self, key, idx=None): new_key = self._keys_index(key) if idx is None: data = np.array(self.f[new_key]) else: data = np.array(self.f[new_key][idx]) return data def set_value(self, key, value, idx=None): new_key = self._keys_index(key) if new_key not in self.f: self.f.create_dataset(new_key, shape=value.shape, dtype=value.dtype) if idx is None: self.f[new_key][...] = value else: self.f[new_key][idx] = value def close(self): self.f.close() class H5pyMediaMapEmbProxy(H5pyMediaMapEmb): pass