MuseV-test / mmcm /utils /gpu_util.py
kevinwang676's picture
Upload folder using huggingface_hub
6755a2d verified
raw
history blame
3.12 kB
from typing import Union, List, Dict, Tuple, Literal
import logging
def convert_byte_unit(
value: float,
src_unit: Literal["b", "B", "KB", "MB", "GB", "TB"],
target_unit: Literal["b", "B", "KB", "MB", "GB", "TB"],
) -> float:
"""convert value in src_unit to target_unit. Firstlt, all src_unit to Byte, then to target_unit
Args:
value (float): _description_
src_unit (Literal["b", "B", "KB", "MB", "GB", "TB"]): _description_
target_unit (Literal["b", "B", "KB", "MB", "GB", "TB"]): _description_
Raises:
ValueError: _description_
ValueError: _description_
Returns:
float: _description_
"""
if src_unit in ["b", "bit"]:
value = value / 8
elif src_unit in ["B", "Byte"]:
pass
elif src_unit == "KB":
value = value * 1024
elif src_unit == "MB":
value = value * 1024**2
elif src_unit == "GB":
value = value * (1024**3)
elif src_unit == "TB":
value = value * (1024**4)
else:
raise ValueError("src_unit is not valid")
if target_unit in ["b", "bit"]:
target_value = value * 8
elif target_unit in ["B", "Byte"]:
target_value = value
elif target_unit == "KB":
target_value = value / 1024
elif target_unit == "MB":
target_value = value / 1024**2
elif target_unit == "GB":
target_value = value / (1024**3)
elif target_unit == "TB":
target_value = value / (1024**4)
else:
raise ValueError("target_unit is not valid")
return target_value
def get_gpu_status(unit="MB") -> List[Dict]:
import pynvml
try:
infos = []
# εˆε§‹εŒ– pynvml
pynvml.nvmlInit()
# θŽ·ε– GPU 数量
deviceCount = pynvml.nvmlDeviceGetCount()
# θŽ·ε–ζ―δΈͺ GPU ηš„δΏ‘ζ―
for i in range(deviceCount):
gpu_info = {}
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
gpu_name = pynvml.nvmlDeviceGetName(handle)
gpu_info = {
"gpu_name": gpu_name,
"total_memory": convert_byte_unit(
info.total, src_unit="B", target_unit=unit
),
"used_memory": convert_byte_unit(
info.used, src_unit="B", target_unit=unit
),
"used_memory_ratio": info.used / info.total,
"gpu_utilization": utilization.gpu,
"free_memory_ratio": info.free / info.total,
"free_memory": convert_byte_unit(
info.free, src_unit="B", target_unit=unit
),
}
infos.append(gpu_info)
# ι‡Šζ”Ύ pynvml
pynvml.nvmlShutdown()
except Exception as e:
print("get_gpu_status failed")
logging.exception(e)
return infos