Initial YakPanel commit

This commit is contained in:
Niranjan
2026-04-07 02:04:22 +05:30
commit 2826d3e7f3
5359 changed files with 1390724 additions and 0 deletions

View File

@@ -0,0 +1,23 @@
from typing import List
from .base import GPUBase
from .nvidia import NVIDIA
from .amd import AMD
class Driver:
drivers: List[GPUBase] = []
def __init__(self):
if NVIDIA.is_support():
self.drivers.append(NVIDIA())
if AMD.is_support():
self.drivers.append(AMD())
@property
def support(self):
return len(self.drivers) > 0
def get_all_device_info(self, get):
for _driver in self.drivers:
pass

View File

@@ -0,0 +1,36 @@
from mod.project.docker.app.gpu.base import GPUBase
class AMD(GPUBase):
@classmethod
def is_support(cls):
pass
def _get_device_version(self, *args, **kwargs):
pass
def _get_device_name(self, *args, **kwargs):
pass
def _get_fan_info(self, *args, **kwargs):
pass
def main(self):
pass
def get_info(self, gpu_id=0):
pass
def _get_mem_info(self):
pass
def _get_clock_info(self):
pass
def _get_temp_info(self):
pass
def _get_uti_info(self):
pass
def _get_proc_uti(self, proc_name='', proc_pid=0):
pass

View File

@@ -0,0 +1,70 @@
from abc import ABC, abstractmethod
class GPUBase(ABC):
name = 'base'
support = None
@abstractmethod
def _get_mem_info(self, *args, **kwargs):
"""
获取显存占用
Returns:
"""
pass
@abstractmethod
def _get_clock_info(self, *args, **kwargs):
"""
获取时钟信息
Returns:
"""
pass
@abstractmethod
def _get_temp_info(self, *args, **kwargs):
"""
获取温度
Returns:
"""
pass
@abstractmethod
def _get_uti_info(self, *args, **kwargs):
"""
获取占用
Returns:
"""
pass
@abstractmethod
def _get_proc_uti(self, *args, **kwargs):
"""
获取进程占用
Returns:
"""
pass
@abstractmethod
def _get_fan_info(self, *args, **kwargs):
pass
@abstractmethod
def _get_device_name(self, *args, **kwargs):
pass
@abstractmethod
def _get_device_version(self, *args, **kwargs):
pass
@classmethod
@abstractmethod
def is_support(cls):
pass

View File

@@ -0,0 +1,27 @@
from dataclasses import dataclass
@dataclass
class CMD:
@dataclass
class CTK:
@dataclass
class APT:
GetGPGKey = "curl -fsSL https://nvidia.github.io/libnvidia-container/gpgkey | sudo gpg --dearmor -o /usr/share/keyrings/nvidia-container-toolkit-keyring.gpg"
AddSourcesList = "curl -s -L https://nvidia.github.io/libnvidia-container/stable/deb/nvidia-container-toolkit.list | sed 's#deb https://#deb [signed-by=/usr/share/keyrings/nvidia-container-toolkit-keyring.gpg] https://#g' | sudo tee /etc/apt/sources.list.d/nvidia-container-toolkit.list"
APTUpdate = "sudo apt-get update"
Install = "sudo apt-get install -y nvidia-container-toolkit"
OneInstall = GetGPGKey + ';' + AddSourcesList + ';' + APTUpdate + ';' + Install
@dataclass
class YUM:
AddRepo = "curl -s -L https://nvidia.github.io/libnvidia-container/stable/rpm/nvidia-container-toolkit.repo | sudo tee /etc/yum.repos.d/nvidia-container-toolkit.repo"
Install = "sudo yum install -y nvidia-container-toolkit"
OneInstall = AddRepo + ';' + Install
@dataclass
class ConfigureDocker:
Runtime = "sudo nvidia-ctk runtime configure --runtime=docker"
Restart = "sudo systemctl restart docker"
CheckVersion = "nvidia-ctk -v"

View File

@@ -0,0 +1,199 @@
import sys
from collections import defaultdict
from functools import wraps
if "/www/server/panel/class" not in sys.path:
sys.path.append('/www/server/panel/class')
import public
try:
import pynvml
except:
public.ExecShell("btpip install nvidia-ml-py")
import pynvml
try:
from mod.project.docker.app.gpu.base import GPUBase
except:
class GPUBase:
pass
device_tasks = defaultdict()
system_tasks = defaultdict()
def register_task(name: str):
def task_decorator(task_func):
_task_type, _task_name = name.split(':')
if _task_type == 'device':
device_tasks[_task_name] = task_func
elif _task_type == 'system':
system_tasks[_task_name] = task_func
@wraps(task_func)
def func_wrapper(*args, **kwargs):
return task_func(*args, **kwargs)
return func_wrapper
return task_decorator
class NVIDIA(GPUBase):
name = 'nvidia'
support = None
def __init__(self):
# 判断是否支持并在判断时初始化pynvml库。
self.device_count = 0
if self.is_support():
self.device_count = pynvml.nvmlDeviceGetCount()
def __del__(self):
if self.is_support():
pynvml.nvmlShutdown()
def get_all_device_info(self):
all_info = defaultdict()
all_info['system'] = self.get_system_info()
for index in range(self.device_count):
all_info[index] = self.get_info_by_index(index)
return all_info
def get_info_by_index(self, index=0):
info = defaultdict()
handle = pynvml.nvmlDeviceGetHandleByIndex(index)
for t_name, t_func in device_tasks.items():
try:
info[t_name] = t_func(self, handle)
except:
# public.print_log("pynvml {t_name} error: {}")
info[t_name] = None
return info
def get_system_info(self):
info = defaultdict()
for t_name, t_func in system_tasks.items():
try:
info[t_name] = t_func(self)
except:
# public.print_log(f"pynvml {t_name} error: {e}")
info[t_name] = None
return info
@classmethod
def is_support(cls):
try:
pynvml.nvmlInit()
cls.support = True
return True
except pynvml.NVMLError:
cls.support = False
# public.print_log("Nvidia was not supported!")
return False
@register_task('device:memory')
def _get_mem_info(self, handle):
info = defaultdict()
info['size'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).total) / 1024 ** 3
info['free'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).free) / 1024 ** 3
info['used'] = int(pynvml.nvmlDeviceGetMemoryInfo(handle).used) / 1024 ** 3
return info
@register_task('device:clock')
def _get_clock_info(self, handle):
info = defaultdict()
info['graphics'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_GRAPHICS)
info['sm'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_SM)
info['memory'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_MEM)
info['video'] = pynvml.nvmlDeviceGetClockInfo(handle, pynvml.NVML_CLOCK_VIDEO)
return info
@register_task('device:temperature')
def _get_temp_info(self, handle):
info = 0
try:
info = pynvml.nvmlDeviceGetTemperature(handle, pynvml.NVML_TEMPERATURE_GPU)
except pynvml.NVMLError or AttributeError:
info = pynvml.nvmlDeviceGetTemperatureV1(handle, pynvml.NVML_TEMPERATURE_GPU)
return info
@register_task('device:utilization')
def _get_uti_info(self, handle):
info = defaultdict()
info['gpu'] = pynvml.nvmlDeviceGetUtilizationRates(handle).gpu
info['memory'] = pynvml.nvmlDeviceGetUtilizationRates(handle).memory
return info
@register_task('device:processes')
def _get_proc_uti(self, handle):
info = list()
for p in pynvml.nvmlDeviceGetComputeRunningProcesses(handle):
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
p.__dict__['type'] = 'Compute'
info.append(p.__dict__)
for p in pynvml.nvmlDeviceGetGraphicsRunningProcesses(handle):
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
p.__dict__['type'] = 'Graphics'
info.append(p.__dict__)
for p in pynvml.nvmlDeviceGetMPSComputeRunningProcesses(handle):
p.__dict__['name'] = pynvml.nvmlSystemGetProcessName(p.pid)
p.__dict__['type'] = 'MPS'
info.append(p.__dict__)
return info
@register_task('device:fan')
def _get_fan_info(self, handle):
info = defaultdict()
try:
info['speed'] = pynvml.nvmlDeviceGetFanSpeedRPM(handle).speed
except AttributeError:
info['speed'] = pynvml.nvmlDeviceGetFanSpeed(handle)
except pynvml.NVMLError:
info['speed'] = pynvml.nvmlDeviceGetFanSpeed_v2(handle, 0)
except:
info['speed'] = 0
return info
@register_task('device:name')
def _get_device_name(self, handle):
return pynvml.nvmlDeviceGetName(handle)
@register_task('device:power')
def _get_device_power(self, handle):
info = defaultdict()
info['current'] = pynvml.nvmlDeviceGetPowerUsage(handle)
info['max'] = pynvml.nvmlDeviceGetPowerManagementLimit(handle)
return info
@register_task('system:version')
def _get_device_version(self):
info = defaultdict()
info['driver'] = pynvml.nvmlSystemGetDriverVersion()
try:
info['cuda'] = pynvml.nvmlSystemGetCudaDriverVersion()
except pynvml.NVMLError or AttributeError:
info['cuda'] = pynvml.nvmlSystemGetCudaDriverVersion_v2()
return info
@register_task('system:count')
def _get_device_count(self):
info = 0
info = pynvml.nvmlDeviceGetCount()
return info
if __name__ == '__main__':
nvidia = NVIDIA()
print(nvidia.get_all_device_info())

View File

@@ -0,0 +1,158 @@
import os
import sys
from typing import Tuple
from mod.project.docker.app.gpu.constants import CMD
from mod.project.docker.app.gpu.nvidia import NVIDIA
if "/www/server/panel/class" not in sys.path:
sys.path.append('/www/server/panel/class')
import public
class GPUTool:
gpu_option = None
option_default = None
@staticmethod
def __get_linux_distribution():
"""检测系统是否为 Debian/Ubuntu 或 CentOS/Red Hat 系列"""
try:
# 优先解析 /etc/os-release
with open("/etc/os-release", "r", encoding="utf-8") as f:
os_release = {}
for line in f:
line = line.strip()
if line and "=" in line:
key, value = line.split("=", 1)
os_release[key] = value.strip('"')
dist_id = os_release.get("ID", "").lower()
id_like = os_release.get("ID_LIKE", "").lower()
# 根据 ID 或 ID_LIKE 判断
if dist_id in ["debian", "ubuntu"]:
return "debian"
elif dist_id in ["centos", "rhel", "fedora"]:
return "centos"
elif "debian" in id_like:
return "debian"
elif "rhel" in id_like or "fedora" in id_like:
return "centos"
except FileNotFoundError:
# 如果 /etc/os-release 不存在,检查其他文件
if os.path.exists("/etc/debian_version"):
return "debian"
elif os.path.exists("/etc/redhat-release"):
return "centos"
except Exception:
raise ValueError("System Distribution Is Unknown")
@classmethod
def __gpu_default_setting(cls) -> Tuple[bool, bool]:
"""
检测是否开启GPU
Returns:
gpu_option: 返回是否开启GPU选择
option_default: 默认GPU选择是否开启
"""
if cls.gpu_option is not None and cls.option_default is not None:
return cls.gpu_option, cls.option_default
driver = NVIDIA()
# 如果不支持直接返回
if driver.support is None or driver.support is False:
cls.gpu_option = False
cls.option_default = False
return cls.gpu_option, cls.option_default
# 如果支持则检查显存大小
device_info = driver.get_all_device_info()
mem_size = 0
for _, _device in device_info.items():
mem_size = mem_size + _device.get('memory', {}).get('size', 0)
if mem_size > 3:
cls.gpu_option = True
cls.option_default = True
else:
cls.gpu_option = True
cls.option_default = False
return cls.gpu_option, cls.option_default
@classmethod
def register_app_gpu_option(cls, app):
option, default = cls.__gpu_default_setting()
for field in app.get('field', []):
if option == False and field.get('attr', '') == 'gpu':
app['field'].remove(field)
elif option == True and field.get('attr', '') == 'gpu':
field['default'] = default
field['suffix'] = field['suffix'] + ' | 已默认设置为{}'.format(default)
# public.print_log("\n\n\n\n{}\n\n\n\n".format(field['suffix']))
return app
@staticmethod
def is_install_ctk():
stdout, stderr = public.ExecShell(CMD.CTK.CheckVersion)
if len(stderr) != 0:
return False
if not stdout.lower().find('version'):
public.print_log("Not Nvidia Container Toolkit")
return False
return True
@classmethod
def __ctk_install_cmd_apt(cls, app_log):
return ("{get_gpg_key} >> {app_log};"
"{add_sources_list} >> {app_log};"
"{apt_update} >> {app_log};"
"{install} >> {app_log}"
.format(get_gpg_key=CMD.CTK.APT.GetGPGKey,
add_sources_list=CMD.CTK.APT.AddSourcesList,
apt_update=CMD.CTK.APT.APTUpdate,
install=CMD.CTK.APT.Install,
app_log=app_log
))
@classmethod
def __ctk_install_cmd_yum(cls, app_log):
return ("{add_repo} >> {app_log};"
"{install} >> {app_log}"
.format(add_repo=CMD.CTK.YUM.AddRepo,
install=CMD.CTK.YUM.Install,
app_log=app_log
))
@classmethod
def __config_docker(cls, app_log):
return ("{runtime} >> {app_log};"
"{restart} >> {app_log}"
.format(runtime=CMD.CTK.ConfigureDocker.Runtime,
restart=CMD.CTK.ConfigureDocker.Restart,
app_log=app_log))
@classmethod
def ctk_install_cmd(cls, app_log):
dtb = cls.__get_linux_distribution()
cmd = ''
if dtb == 'debian':
cmd = (
"{install_cmd};"
"{config_docker}"
.format(
install_cmd=cls.__ctk_install_cmd_apt(app_log),
config_docker=cls.__config_docker(app_log),
))
elif dtb == 'centos':
cmd = (
"{install_cmd};"
"{config_docker}"
.format(
install_cmd=cls.__ctk_install_cmd_yum(app_log),
config_docker=cls.__config_docker(app_log),
))
return cmd

View File