Initial YakPanel commit
This commit is contained in:
199
mod/project/docker/app/gpu/nvidia.py
Normal file
199
mod/project/docker/app/gpu/nvidia.py
Normal file
@@ -0,0 +1,199 @@
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from functools import wraps
|
||||
|
||||
if "/www/server/panel/class" not in sys.path:
|
||||
sys.path.append('/www/server/panel/class')
|
||||
|
||||
import public
|
||||
|
||||
try:
|
||||
import pynvml
|
||||
except:
|
||||
public.ExecShell("btpip install nvidia-ml-py")
|
||||
import pynvml
|
||||
|
||||
try:
|
||||
from mod.project.docker.app.gpu.base import GPUBase
|
||||
except:
|
||||
class GPUBase:
|
||||
pass
|
||||
|
||||
device_tasks = defaultdict()
|
||||
system_tasks = defaultdict()
|
||||
|
||||
|
||||
def register_task(name: str):
|
||||
def task_decorator(task_func):
|
||||
_task_type, _task_name = name.split(':')
|
||||
if _task_type == 'device':
|
||||
device_tasks[_task_name] = task_func
|
||||
elif _task_type == 'system':
|
||||
system_tasks[_task_name] = task_func
|
||||
|
||||
@wraps(task_func)
|
||||
def func_wrapper(*args, **kwargs):
|
||||
return task_func(*args, **kwargs)
|
||||
|
||||
return func_wrapper
|
||||
|
||||
return task_decorator
|
||||
|
||||
|
||||
class NVIDIA(GPUBase):
|
||||
name = 'nvidia'
|
||||
support = None
|
||||
|
||||
def __init__(self):
|
||||
# 判断是否支持,并在判断时初始化pynvml库。
|
||||
self.device_count = 0
|
||||
if self.is_support():
|
||||
self.device_count = pynvml.nvmlDeviceGetCount()
|
||||
|
||||
def __del__(self):
|
||||
if self.is_support():
|
||||
pynvml.nvmlShutdown()
|
||||
|
||||
def get_all_device_info(self):
|
||||
all_info = defaultdict()
|
||||
all_info['system'] = self.get_system_info()
|
||||
for index in range(self.device_count):
|
||||
all_info[index] = self.get_info_by_index(index)
|
||||
return all_info
|
||||
|
||||
def get_info_by_index(self, index=0):
|
||||
info = defaultdict()
|
||||
handle = pynvml.nvmlDeviceGetHandleByIndex(index)
|
||||
|
||||
for t_name, t_func in device_tasks.items():
|
||||
try:
|
||||
info[t_name] = t_func(self, handle)
|
||||
except:
|
||||
# public.print_log("pynvml {t_name} error: {}")
|
||||
info[t_name] = None
|
||||
|
||||
return info
|
||||
|
||||
def get_system_info(self):
|
||||
info = defaultdict()
|
||||
for t_name, t_func in system_tasks.items():
|
||||
try:
|
||||
info[t_name] = t_func(self)
|
||||
except:
|
||||
# public.print_log(f"pynvml {t_name} error: {e}")
|
||||
info[t_name] = None
|
||||
return info
|
||||
|
||||
@classmethod
|
||||
def is_support(cls):
|
||||
try:
|
||||
pynvml.nvmlInit()
|
||||
cls.support = True
|
||||
return True
|
||||
|
||||
except pynvml.NVMLError:
|
||||
cls.support = False
|
||||
# public.print_log("Nvidia was not supported!")
|
||||
return False
|
||||
|
||||
@register_task('device:memory')
|
||||
def _get_mem_info(self, handle):
|
||||
info = defaultdict()
|
||||
info['size'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).total) / 1024 ** 3
|
||||
info['free'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).free) / 1024 ** 3
|
||||
info['used'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).used) / 1024 ** 3
|
||||
return info
|
||||
|
||||
@register_task('device:clock')
|
||||
def _get_clock_info(self, handle):
|
||||
info = defaultdict()
|
||||
info['graphics'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_GRAPHICS)
|
||||
info['sm'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_SM)
|
||||
info['memory'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
|
||||
info['video'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_VIDEO)
|
||||
|
||||
return info
|
||||
|
||||
@register_task('device:temperature')
|
||||
def _get_temp_info(self, handle):
|
||||
info = 0
|
||||
try:
|
||||
info = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
|
||||
except pynvml.NVMLError or AttributeError:
|
||||
info = pynvml.nvmlDeviceGetTemperatureV1(handle, pynvml.NVML_TEMPERATURE_GPU)
|
||||
return info
|
||||
|
||||
@register_task('device:utilization')
|
||||
def _get_uti_info(self, handle):
|
||||
info = defaultdict()
|
||||
info['gpu'] = pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
|
||||
info['memory'] = pynvml.nvmlDeviceGetUtilizationRates(handle).memory
|
||||
|
||||
return info
|
||||
|
||||
@register_task('device:processes')
|
||||
def _get_proc_uti(self, handle):
|
||||
info = list()
|
||||
for p in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
|
||||
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
|
||||
p.__dict__['type'] = 'Compute'
|
||||
info.append(p.__dict__)
|
||||
|
||||
for p in pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle):
|
||||
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
|
||||
p.__dict__['type'] = 'Graphics'
|
||||
info.append(p.__dict__)
|
||||
|
||||
for p in pynvml.nvmlDeviceGetMPSComputeRunningProcesses(handle):
|
||||
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
|
||||
p.__dict__['type'] = 'MPS'
|
||||
info.append(p.__dict__)
|
||||
|
||||
return info
|
||||
|
||||
@register_task('device:fan')
|
||||
def _get_fan_info(self, handle):
|
||||
info = defaultdict()
|
||||
try:
|
||||
info['speed'] = pynvml.nvmlDeviceGetFanSpeedRPM(handle).speed
|
||||
except AttributeError:
|
||||
info['speed'] = pynvml.nvmlDeviceGetFanSpeed(handle)
|
||||
except pynvml.NVMLError:
|
||||
info['speed'] = pynvml.nvmlDeviceGetFanSpeed_v2(handle, 0)
|
||||
except:
|
||||
info['speed'] = 0
|
||||
return info
|
||||
|
||||
@register_task('device:name')
|
||||
def _get_device_name(self, handle):
|
||||
return pynvml.nvmlDeviceGetName(handle)
|
||||
|
||||
@register_task('device:power')
|
||||
def _get_device_power(self, handle):
|
||||
info = defaultdict()
|
||||
info['current'] = pynvml.nvmlDeviceGetPowerUsage(handle)
|
||||
info['max'] = pynvml.nvmlDeviceGetPowerManagementLimit(handle)
|
||||
return info
|
||||
|
||||
@register_task('system:version')
|
||||
def _get_device_version(self):
|
||||
info = defaultdict()
|
||||
info['driver'] = pynvml.nvmlSystemGetDriverVersion()
|
||||
|
||||
try:
|
||||
info['cuda'] = pynvml.nvmlSystemGetCudaDriverVersion()
|
||||
except pynvml.NVMLError or AttributeError:
|
||||
info['cuda'] = pynvml.nvmlSystemGetCudaDriverVersion_v2()
|
||||
|
||||
return info
|
||||
|
||||
@register_task('system:count')
|
||||
def _get_device_count(self):
|
||||
info = 0
|
||||
info = pynvml.nvmlDeviceGetCount()
|
||||
return info
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
nvidia = NVIDIA()
|
||||
print(nvidia.get_all_device_info())
|
||||
Reference in New Issue
Block a user