Spaces:
No application file
No application file
File size: 3,119 Bytes
6755a2d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 |
from typing import Union, List, Dict, Tuple, Literal
import logging
def convert_byte_unit(
value: float,
src_unit: Literal["b", "B", "KB", "MB", "GB", "TB"],
target_unit: Literal["b", "B", "KB", "MB", "GB", "TB"],
) -> float:
"""convert value in src_unit to target_unit. Firstlt, all src_unit to Byte, then to target_unit
Args:
value (float): _description_
src_unit (Literal["b", "B", "KB", "MB", "GB", "TB"]): _description_
target_unit (Literal["b", "B", "KB", "MB", "GB", "TB"]): _description_
Raises:
ValueError: _description_
ValueError: _description_
Returns:
float: _description_
"""
if src_unit in ["b", "bit"]:
value = value / 8
elif src_unit in ["B", "Byte"]:
pass
elif src_unit == "KB":
value = value * 1024
elif src_unit == "MB":
value = value * 1024**2
elif src_unit == "GB":
value = value * (1024**3)
elif src_unit == "TB":
value = value * (1024**4)
else:
raise ValueError("src_unit is not valid")
if target_unit in ["b", "bit"]:
target_value = value * 8
elif target_unit in ["B", "Byte"]:
target_value = value
elif target_unit == "KB":
target_value = value / 1024
elif target_unit == "MB":
target_value = value / 1024**2
elif target_unit == "GB":
target_value = value / (1024**3)
elif target_unit == "TB":
target_value = value / (1024**4)
else:
raise ValueError("target_unit is not valid")
return target_value
def get_gpu_status(unit="MB") -> List[Dict]:
import pynvml
try:
infos = []
# 初始化 pynvml
pynvml.nvmlInit()
# 获取 GPU 数量
deviceCount = pynvml.nvmlDeviceGetCount()
# 获取每个 GPU 的信息
for i in range(deviceCount):
gpu_info = {}
handle = pynvml.nvmlDeviceGetHandleByIndex(i)
info = pynvml.nvmlDeviceGetMemoryInfo(handle)
utilization = pynvml.nvmlDeviceGetUtilizationRates(handle)
gpu_name = pynvml.nvmlDeviceGetName(handle)
gpu_info = {
"gpu_name": gpu_name,
"total_memory": convert_byte_unit(
info.total, src_unit="B", target_unit=unit
),
"used_memory": convert_byte_unit(
info.used, src_unit="B", target_unit=unit
),
"used_memory_ratio": info.used / info.total,
"gpu_utilization": utilization.gpu,
"free_memory_ratio": info.free / info.total,
"free_memory": convert_byte_unit(
info.free, src_unit="B", target_unit=unit
),
}
infos.append(gpu_info)
# 释放 pynvml
pynvml.nvmlShutdown()
except Exception as e:
print("get_gpu_status failed")
logging.exception(e)
return infos
|