Initial YakPanel commit
This commit is contained in:
3
mod/project/node/task_flow/__init__.py
Normal file
3
mod/project/node/task_flow/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from .file_task import self_file_running_log, file_task_run_sync
|
||||
from .command_task import command_task_run_sync
|
||||
from .flow import flow_running_log, flow_useful_version
|
||||
195
mod/project/node/task_flow/command_task.py
Normal file
195
mod/project/node/task_flow/command_task.py
Normal file
@@ -0,0 +1,195 @@
|
||||
import json
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
import traceback
|
||||
from typing import List, Dict, Callable, Any, Union
|
||||
|
||||
from mod.base.ssh_executor import SSHExecutor
|
||||
from mod.project.node.dbutil import ServerNodeDB, CommandTask, CommandLog, TaskFlowsDB
|
||||
|
||||
|
||||
class CMDTask(object):
|
||||
|
||||
def __init__(self, task: Union[int, CommandTask], log_id: int, call_update: Callable[[Any], None], exclude_nodes: List[int] = None):
|
||||
self._edb = TaskFlowsDB()
|
||||
if isinstance(task, int):
|
||||
self.task = self._edb.CommandTask.find("id = ?", (task,))
|
||||
elif isinstance(task, CommandTask):
|
||||
self.task = task
|
||||
else:
|
||||
raise ValueError("Task parameter error")
|
||||
if not self.task:
|
||||
raise RuntimeError("The specified task does not exist")
|
||||
if log_id == 0:
|
||||
self.task.elogs = self._edb.CommandLog.query("command_task_id = ? ", (self.task.id,))
|
||||
else:
|
||||
self.task.elogs = [self._edb.CommandLog.find("command_task_id = ? AND id = ?", (self.task.id, log_id))]
|
||||
if not self.task.elogs:
|
||||
raise RuntimeError("Task has no execution entry")
|
||||
|
||||
self._exclude_nodes = exclude_nodes or []
|
||||
self.task.elogs = [x for x in self.task.elogs if x.server_id not in self._exclude_nodes]
|
||||
|
||||
self.task.status = 1
|
||||
self._edb.CommandTask.update(self.task)
|
||||
self.end_queue = queue.Queue()
|
||||
self.end_status = False
|
||||
self.status: List[Dict] = []
|
||||
self.call_update = call_update
|
||||
self.status_dict: Dict[str, Union[List[Any], int]] = {
|
||||
"task_id": self.task.id,
|
||||
"task_type": "command",
|
||||
"flow_idx": self.task.step_index -1,
|
||||
"count": len(self.task.elogs),
|
||||
"complete": 0,
|
||||
"error": 0,
|
||||
"exclude_nodes": self._exclude_nodes,
|
||||
"error_nodes": [],
|
||||
"data": [],
|
||||
}
|
||||
|
||||
def end_func(self):
|
||||
edb = TaskFlowsDB()
|
||||
tmp_dict: Dict[int, CommandLog] = {}
|
||||
last_time = time.time()
|
||||
update_fields=("status",)
|
||||
complete_set, error_set = set(), set()
|
||||
while True:
|
||||
try:
|
||||
elog: CommandLog = self.end_queue.get(timeout=0.1)
|
||||
except queue.Empty:
|
||||
if self.end_status:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
except Exception as e:
|
||||
print(e)
|
||||
break
|
||||
|
||||
if elog.status in (3, 4):
|
||||
error_set.add(elog.id)
|
||||
self.status_dict["error_nodes"].append(int(elog.server_id))
|
||||
self.status_dict["error"] = len(error_set)
|
||||
elif elog.status == 2:
|
||||
complete_set.add(elog.id)
|
||||
self.status_dict["complete"] = len(complete_set)
|
||||
|
||||
tmp_dict[elog.id] = elog
|
||||
if time.time() - last_time > 0.5:
|
||||
edb.CommandLog.bath_update(tmp_dict.values(), update_fields=update_fields)
|
||||
self.status_dict["data"] = [ l.to_show_data() for l in tmp_dict.values()]
|
||||
self.call_update(self.status_dict)
|
||||
tmp_dict.clear()
|
||||
|
||||
if tmp_dict:
|
||||
edb.CommandLog.bath_update(tmp_dict.values(), update_fields=update_fields)
|
||||
self.status_dict["data"] = [ l.to_show_data() for l in tmp_dict.values()]
|
||||
self.call_update(self.status_dict)
|
||||
|
||||
return
|
||||
|
||||
def start(self):
|
||||
thread_list = []
|
||||
s_db = ServerNodeDB()
|
||||
end_th = threading.Thread(target=self.end_func)
|
||||
end_th.start()
|
||||
|
||||
for (idx, log) in enumerate(self.task.elogs):
|
||||
log.log_idx = idx
|
||||
if log.status == 2: # 跳过已完成的
|
||||
self.end_queue.put(log)
|
||||
continue
|
||||
|
||||
log.status = 1
|
||||
ssh_conf = None
|
||||
node = s_db.get_node_by_id(log.server_id)
|
||||
if not node:
|
||||
log.status = 3
|
||||
log.write_log("Node data loss, unable to execute\n")
|
||||
|
||||
else:
|
||||
ssh_conf = json.loads(node["ssh_conf"])
|
||||
if not ssh_conf:
|
||||
log.status = 3
|
||||
log.write_log("Node SSH configuration data lost, unable to execute\n")
|
||||
|
||||
self.end_queue.put(log)
|
||||
|
||||
if not ssh_conf:
|
||||
continue
|
||||
|
||||
thread = threading.Thread(target=self.run_one, args=(ssh_conf, log))
|
||||
thread.start()
|
||||
thread_list.append(thread)
|
||||
|
||||
for i in thread_list:
|
||||
i.join()
|
||||
self.end_status = True
|
||||
end_th.join()
|
||||
if self.status_dict["error"] > 0:
|
||||
self.task.status = 3
|
||||
else:
|
||||
self.task.status = 2
|
||||
self._edb.CommandTask.update(self.task)
|
||||
self._edb.close()
|
||||
|
||||
def run_one(self, ssh_conf: dict, elog: CommandLog):
|
||||
ssh = SSHExecutor(
|
||||
host=ssh_conf["host"],
|
||||
port=ssh_conf["port"],
|
||||
username=ssh_conf["username"],
|
||||
password=ssh_conf["password"],
|
||||
key_data=ssh_conf["pkey"],
|
||||
passphrase=ssh_conf["pkey_passwd"])
|
||||
elog.write_log("Start executing the task\nStart establishing SSH connection...\n")
|
||||
try:
|
||||
ssh.open()
|
||||
def on_stdout(data):
|
||||
if isinstance(data, bytes):
|
||||
data = data.decode()
|
||||
elog.write_log(data)
|
||||
|
||||
elog.write_log("Start executing script...\n\n")
|
||||
t = time.time()
|
||||
res_code = ssh.execute_script_streaming(
|
||||
script_content=self.task.script_content,
|
||||
script_type=self.task.script_type,
|
||||
timeout=60*60,
|
||||
on_stdout=on_stdout,
|
||||
on_stderr=on_stdout
|
||||
)
|
||||
take_time = round((time.time() - t)* 1000, 2)
|
||||
elog.write_log("\n\nExecution completed, time-consuming [{}ms]\n".format(take_time))
|
||||
if res_code == 0:
|
||||
elog.status = 2
|
||||
elog.write_log("Mission accomplished\n", is_end_log=True)
|
||||
else:
|
||||
elog.status = 4
|
||||
elog.write_log("Task exception, return status code is:{}\n".format(res_code), is_end_log=True)
|
||||
self.end_queue.put(elog)
|
||||
except Exception as e:
|
||||
traceback.print_exc()
|
||||
elog.status = 3
|
||||
elog.write_log("\nTask failed, error:" + str(e), is_end_log=True)
|
||||
self.end_queue.put(elog)
|
||||
return
|
||||
|
||||
|
||||
# 同步执行命令相关任务的重试
|
||||
def command_task_run_sync(task_id: int, log_id: int) -> Union[str, Dict[str, Any]]:
|
||||
fdb = TaskFlowsDB()
|
||||
task = fdb.CommandTask.get_byid(task_id)
|
||||
if not task:
|
||||
return "Task does not exist"
|
||||
log = fdb.CommandLog.get_byid(log_id)
|
||||
if not log:
|
||||
return "Subtask does not exist"
|
||||
if log.status not in (3, 4):
|
||||
return "Subtask status is not failed, unable to retry"
|
||||
if log.command_task_id != task_id:
|
||||
return "The subtask does not belong to this task and cannot be retried"
|
||||
cmd_task = CMDTask(task, log_id=log_id, call_update=print)
|
||||
cmd_task.start()
|
||||
return cmd_task.status_dict
|
||||
|
||||
484
mod/project/node/task_flow/file_task.py
Normal file
484
mod/project/node/task_flow/file_task.py
Normal file
@@ -0,0 +1,484 @@
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
import traceback
|
||||
import itertools
|
||||
from datetime import datetime
|
||||
from typing import List, Dict, Callable, Any, Tuple, Union, Optional
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
from mod.base.ssh_executor import SSHExecutor
|
||||
from mod.project.node.dbutil import ServerNodeDB, TaskFlowsDB, TransferTask, TransferFile, TransferLog
|
||||
from mod.project.node.nodeutil import ServerNode, LPanelNode, SSHApi
|
||||
from mod.project.node.filetransfer.socket_server import StatusServer, StatusClient, register_cleanup
|
||||
|
||||
|
||||
_SOCKET_FILE_DIR = "/tmp/flow_task"
|
||||
if not os.path.exists(_SOCKET_FILE_DIR):
|
||||
os.mkdir(_SOCKET_FILE_DIR)
|
||||
|
||||
def _dir_walk(path: str) -> Tuple[List[dict], str]:
|
||||
if not os.path.isdir(path):
|
||||
return [], "{} Not a directory".format(path)
|
||||
res_file = []
|
||||
count = 0
|
||||
empty_dir = []
|
||||
for root, dirs, files in os.walk(path):
|
||||
if not files:
|
||||
empty_dir.append(root)
|
||||
for f in files:
|
||||
count += 1
|
||||
try:
|
||||
res_file.append({
|
||||
"path": os.path.join(root, f),
|
||||
"size": os.path.getsize(os.path.join(root, f)),
|
||||
"is_dir": 0
|
||||
})
|
||||
except:
|
||||
pass
|
||||
return [{"path": d, "size": 0, "is_dir": 1} for d in empty_dir] + res_file, ""
|
||||
|
||||
|
||||
class FiletransferTask(object):
|
||||
|
||||
def __init__(self, task: Union[int, TransferTask],
|
||||
call_update: Callable[[Any], None],
|
||||
exclude_nodes: List[int] = None,
|
||||
the_log_id: int = None,
|
||||
):
|
||||
self._fdb = TaskFlowsDB()
|
||||
if isinstance(task, int):
|
||||
self.task = self._fdb.TransferTask.get_byid(task)
|
||||
elif isinstance(task, TransferTask):
|
||||
self.task = task
|
||||
else:
|
||||
raise ValueError("Parameter exception")
|
||||
|
||||
if not self.task:
|
||||
raise RuntimeError("Task does not exist")
|
||||
|
||||
self.exclude_nodes = exclude_nodes or []
|
||||
self.the_log_id = max(the_log_id, 0) if isinstance(the_log_id, int) else 0
|
||||
self.event_queue = queue.Queue()
|
||||
self.trans_queue = queue.Queue()
|
||||
self.mut = threading.Lock()
|
||||
self._srv_cache: Dict[int, Union[SSHApi, LPanelNode, ServerNode]] = {}
|
||||
self.status_dict: Dict[str, Any] = {
|
||||
"task_id": self.task.id,
|
||||
"task_type": "file",
|
||||
"flow_idx": self.task.step_index -1,
|
||||
"count": 0,
|
||||
"complete": 0,
|
||||
"error": 0,
|
||||
"error_nodes": [],
|
||||
"exclude_nodes": self.exclude_nodes,
|
||||
"data": None,
|
||||
}
|
||||
self.is_trans_end = False
|
||||
self.call_update = call_update
|
||||
|
||||
def _init_files(self): # 初始化文件列表
|
||||
has_file = self._fdb.TransferFile.find("flow_id = ? AND transfer_task_id = ?", (self.task.flow_id, self.task.id))
|
||||
# 判断文件列表是否已经初始化
|
||||
if has_file:
|
||||
return
|
||||
|
||||
file_list = []
|
||||
for src_item in self.task.path_list:
|
||||
dst_path = src_item["dst_path"].rstrip("/")
|
||||
src_item["path"] = src_item["path"].rstrip("/")
|
||||
if not os.path.exists(src_item["path"]):
|
||||
continue
|
||||
src_item["is_dir"] = os.path.isdir(src_item["path"])
|
||||
if src_item["is_dir"]:
|
||||
f_list, err = _dir_walk(src_item["path"])
|
||||
if not f_list:
|
||||
src_item["dst_file"] = os.path.join(dst_path, os.path.basename(src_item["path"]))
|
||||
file_list.append(src_item)
|
||||
else:
|
||||
for f_item in f_list:
|
||||
f_item["dst_file"] = f_item["path"].replace(os.path.dirname(src_item["path"]), dst_path)
|
||||
file_list.extend(f_list)
|
||||
else:
|
||||
if not os.path.isfile(src_item["path"]):
|
||||
continue
|
||||
src_item["dst_file"] = os.path.join(dst_path, os.path.basename(src_item["path"]))
|
||||
src_item["size"] = os.path.getsize(src_item["path"])
|
||||
file_list.append(src_item)
|
||||
|
||||
t_list = []
|
||||
for f_item in file_list:
|
||||
fl = TransferFile(
|
||||
flow_id=self.task.flow_id,
|
||||
transfer_task_id=self.task.id,
|
||||
src_file=f_item["path"],
|
||||
dst_file=f_item["dst_file"],
|
||||
file_size=f_item["size"],
|
||||
is_dir=f_item["is_dir"],
|
||||
)
|
||||
t_list.append(fl)
|
||||
try:
|
||||
self._fdb.TransferFile.create(t_list)
|
||||
except:
|
||||
print("Failed to initialize file list", traceback.format_exc())
|
||||
|
||||
def _init_files_log(self):
|
||||
tf_list = self._fdb.TransferFile.query("flow_id = ? AND transfer_task_id = ?", (self.task.flow_id, self.task.id))
|
||||
if not tf_list:
|
||||
return []
|
||||
has_fl = self._fdb.TransferLog.query("transfer_task_id = ? AND transfer_file_id = ?", (self.task.id, tf_list[0].id))
|
||||
if has_fl:
|
||||
return self._fdb.TransferLog.query("transfer_task_id = ?", (self.task.id,))
|
||||
|
||||
fl_list = []
|
||||
for (tf, dst_node_id) in itertools.product(tf_list, self.task.dst_nodes.keys()):
|
||||
fl = TransferLog(
|
||||
flow_id=self.task.flow_id,
|
||||
transfer_task_id=self.task.id,
|
||||
transfer_file_id=tf.id,
|
||||
dst_node_idx=int(dst_node_id),
|
||||
status=0,
|
||||
progress=0,
|
||||
message=""
|
||||
)
|
||||
fl_list.append(fl)
|
||||
|
||||
try:
|
||||
self._fdb.TransferLog.create(fl_list)
|
||||
except:
|
||||
print("Failed to initialize file list", traceback.format_exc())
|
||||
|
||||
|
||||
def _get_srv(self, idx: int) -> Union[SSHApi, LPanelNode, ServerNode]:
|
||||
idx = int(idx)
|
||||
if idx in self._srv_cache:
|
||||
return self._srv_cache[idx]
|
||||
with self.mut:
|
||||
if idx in self._srv_cache:
|
||||
return self._srv_cache[idx]
|
||||
if idx not in self.task.dst_nodes:
|
||||
raise RuntimeError("Node index is out of range")
|
||||
srv_data: dict = self.task.dst_nodes[idx]
|
||||
if srv_data.get("lpver", None):
|
||||
srv = LPanelNode(srv_data["address"], srv_data["api_key"], srv_data["lpver"])
|
||||
elif srv_data["api_key"] or srv_data["app_key"]:
|
||||
srv = ServerNode(srv_data["address"], srv_data["api_key"], srv_data["app_key"])
|
||||
else:
|
||||
srv_data["ssh_conf"]["threading_mod"] = True # 线程模式, 在不同线程中使用同一个ssh链接的不同会话
|
||||
srv = SSHApi(**srv_data["ssh_conf"])
|
||||
self._srv_cache[idx] = srv
|
||||
return srv
|
||||
|
||||
def start(self):
|
||||
self.task.status = 1
|
||||
self._fdb.TransferTask.update(self.task)
|
||||
self._init_files()
|
||||
self._init_files_log()
|
||||
if self.the_log_id > 0: # 重试某个固定任务
|
||||
query_where = "transfer_task_id = ? and id = ?"
|
||||
files_logs = self._fdb.TransferLog.query(query_where, (self.task.id, self.the_log_id))
|
||||
else:
|
||||
if self.exclude_nodes:
|
||||
# 获取未完成文件列表
|
||||
query_where = "transfer_task_id = ? and status not in (2, 4) and dst_node_idx not in ({})".format(
|
||||
",".join(["?"] * len(self.exclude_nodes))
|
||||
)
|
||||
else:
|
||||
query_where = "transfer_task_id = ? and status not in (2, 4)"
|
||||
files_logs = self._fdb.TransferLog.query(query_where, (self.task.id, *self.exclude_nodes))
|
||||
files_list = self._fdb.TransferFile.query("transfer_task_id = ?", (self.task.id,))
|
||||
if not files_logs:
|
||||
return
|
||||
files_map = {fl.id: fl for fl in files_list}
|
||||
for (idx, fl) in enumerate(files_logs):
|
||||
fl.log_idx = idx
|
||||
fl.tf = files_map[fl.transfer_file_id]
|
||||
self.trans_queue.put(fl)
|
||||
|
||||
self.status_dict["count"] = len(files_logs)
|
||||
|
||||
th_event = threading.Thread(target=self.event_func,)
|
||||
th_event.start()
|
||||
|
||||
with ThreadPoolExecutor(max_workers=8) as executor:
|
||||
futures = [executor.submit(self.once_trans, worker_id) for worker_id in range(8)]
|
||||
for i in range(8):
|
||||
executor.submit(self.once_trans)
|
||||
for future in as_completed(futures):
|
||||
print("Completed result:", future.result())
|
||||
|
||||
self.is_trans_end = True
|
||||
th_event.join()
|
||||
if self.the_log_id > 0:
|
||||
# 如果有未完成或错误的文件, 则任务完成
|
||||
if self._fdb.TransferLog.count("transfer_task_id = ? and status in (0,1,3)", (self.task.id, )) == 0:
|
||||
self.task.status = 3
|
||||
else:
|
||||
self.task.status = 2
|
||||
else:
|
||||
self.task.status = 2 if self.status_dict["error"] == 0 else 3
|
||||
self._fdb.TransferTask.update(self.task)
|
||||
self._fdb.close()
|
||||
|
||||
def once_trans(self, worker_id: int):
|
||||
while True:
|
||||
try:
|
||||
tl = self.trans_queue.get(block=False)
|
||||
except queue.Empty:
|
||||
print("worker_id: %s, The queue is empty" % worker_id)
|
||||
break
|
||||
except Exception as e:
|
||||
print("worker_id: %s, Failed to obtain task" % worker_id)
|
||||
print(traceback.format_exc())
|
||||
break
|
||||
|
||||
# 执行一次文件传输
|
||||
try:
|
||||
if tl.status == 2: # 跳过已完成的文件
|
||||
self.event_queue.put(tl)
|
||||
continue
|
||||
srv = self._get_srv(tl.dst_node_idx)
|
||||
if tl.tf.is_dir: # 处理空目录
|
||||
exits, err = srv.target_file_exits(tl.tf.dst_file)
|
||||
if err: # 获取文件状态错误
|
||||
tl.message = err
|
||||
tl.status = 3
|
||||
self.event_queue.put(tl)
|
||||
elif exits: # 目标文件已存在
|
||||
tl.status = 4
|
||||
tl.progress = 100
|
||||
self.event_queue.put(tl)
|
||||
else: # 目标文件不存在, 创建目录
|
||||
res, err = srv.create_dir(tl.tf.dst_file)
|
||||
if err:
|
||||
tl.message = err
|
||||
tl.status = 3
|
||||
elif isinstance(res, dict):
|
||||
if res["status"]:
|
||||
tl.status = 2
|
||||
tl.message = ""
|
||||
tl.progress = 100
|
||||
else:
|
||||
tl.message = res["msg"]
|
||||
tl.status = 3
|
||||
else:
|
||||
tl.status = 2
|
||||
tl.message = ""
|
||||
tl.progress = 100
|
||||
|
||||
self.event_queue.put(tl)
|
||||
else: # 处理文件上传
|
||||
tl.status = 1
|
||||
self.event_queue.put(tl)
|
||||
def _call_log(progress, log):
|
||||
tl.progress = progress
|
||||
self.event_queue.put(tl)
|
||||
|
||||
err = srv.upload_file(
|
||||
filename=tl.tf.src_file,
|
||||
target_path=os.path.dirname(tl.tf.dst_file),
|
||||
mode="cover",
|
||||
call_log=_call_log)
|
||||
|
||||
if err:
|
||||
tl.status = 3
|
||||
tl.message = err
|
||||
else:
|
||||
tl.status = 2
|
||||
tl.message = ""
|
||||
tl.progress = 100
|
||||
|
||||
self.event_queue.put(tl)
|
||||
except Exception as e:
|
||||
err = traceback.format_exc()
|
||||
tl.status = 3
|
||||
tl.message = str(e) + "\n" + err
|
||||
self.event_queue.put(tl)
|
||||
|
||||
def event_func(self):
|
||||
fdb = TaskFlowsDB()
|
||||
last_time = time.time()
|
||||
tmp_dict = {}
|
||||
update_fields = ("status", "message", "progress", "completed_at", "started_at")
|
||||
complete_set, error_set = set(), set()
|
||||
error_node_set = set()
|
||||
while True:
|
||||
try:
|
||||
tl: TransferLog = self.event_queue.get(timeout=0.1)
|
||||
except queue.Empty:
|
||||
if self.is_trans_end:
|
||||
break
|
||||
else:
|
||||
continue
|
||||
except Exception as e:
|
||||
print(e)
|
||||
break
|
||||
if tl.status in (2, 4):
|
||||
complete_set.add(tl.id)
|
||||
self.status_dict["complete"] = len(complete_set)
|
||||
if not tl.started_at:
|
||||
tl.started_at = tl.started_at or datetime.now()
|
||||
tl.completed_at = tl.completed_at or datetime.now()
|
||||
elif tl.status == 3:
|
||||
error_set.add(tl.id)
|
||||
self.status_dict["error"] = len(error_set)
|
||||
tl.completed_at = datetime.now()
|
||||
error_node_set.add(tl.dst_node_idx)
|
||||
elif tl.status == 1:
|
||||
tl.started_at = datetime.now()
|
||||
|
||||
tmp_dict[tl.id] = tl
|
||||
if time.time() - last_time > 0.5:
|
||||
fdb.TransferLog.bath_update(tmp_dict.values(), update_fields=update_fields)
|
||||
last_time = time.time()
|
||||
|
||||
self.status_dict["data"] = [i.to_show_data() for i in tmp_dict.values()]
|
||||
self.status_dict["error_nodes"] = list(error_node_set)
|
||||
self.call_update(self.status_dict)
|
||||
tmp_dict.clear()
|
||||
|
||||
|
||||
if tmp_dict:
|
||||
fdb.TransferLog.bath_update(tmp_dict.values(), update_fields=update_fields)
|
||||
self.status_dict["data"] = [i.to_show_data() for i in tmp_dict.values()]
|
||||
self.status_dict["error_nodes"] = list(error_node_set)
|
||||
self.call_update(self.status_dict)
|
||||
|
||||
fdb.close()
|
||||
|
||||
|
||||
# 在远程节点上执行文件传输
|
||||
class NodeFiletransferTask(object):
|
||||
|
||||
def __init__(self, task: TransferTask,
|
||||
call_update: Callable[[Any], None],
|
||||
exclude_nodes: List[int] = None,
|
||||
the_log_id: int = None,
|
||||
):
|
||||
self.task = task
|
||||
src_node = task.src_node
|
||||
self.exclude_nodes = exclude_nodes or []
|
||||
self.srv = ServerNode(src_node["address"],src_node["api_key"], src_node["app_key"], src_node["name"])
|
||||
self.the_log_id = max(the_log_id, 0) if isinstance(the_log_id, int) else 0
|
||||
self.call_update = call_update
|
||||
self.default_status_data = {
|
||||
"task_id": self.task.id,
|
||||
"task_type": "file",
|
||||
}
|
||||
self.status_dict = dict() # 状态数据
|
||||
|
||||
def start(self):
|
||||
fdb = TaskFlowsDB()
|
||||
self.task.status = 1
|
||||
fdb.TransferTask.update(self.task)
|
||||
err = self.srv.proxy_transferfile_status(self.task.src_node_task_id, self.exclude_nodes, self.the_log_id, self.handle_proxy_data)
|
||||
if err:
|
||||
self.task.status = 3
|
||||
self.task.message += ";" + err
|
||||
else:
|
||||
if self.status_dict and self.status_dict.get("error", 0):
|
||||
self.task.status = 3
|
||||
else:
|
||||
self.task.status = 2
|
||||
if self.task.message:
|
||||
self.task.status = 3
|
||||
|
||||
fdb.TransferTask.update(self.task)
|
||||
|
||||
def handle_proxy_data(self, data):
|
||||
ret = {"count": 0,"complete": 0,"error": 0, "error_nodes":[], "data": []}
|
||||
try:
|
||||
data_dict = json.loads(data)
|
||||
if "type" not in data_dict:
|
||||
return
|
||||
|
||||
if data_dict["type"] == "status":
|
||||
if "init" in data_dict["data"]: # 初始化状态跳过
|
||||
return
|
||||
ret.update(data_dict["data"])
|
||||
ret.update(self.default_status_data)
|
||||
else: # end / error 状态 获取历史数据或错误信息
|
||||
if "data" in data_dict:
|
||||
ret.update(data_dict["data"])
|
||||
ret.update(self.default_status_data)
|
||||
elif "msg" in data_dict:
|
||||
self.task.message = data_dict["msg"]
|
||||
return
|
||||
except:
|
||||
print(traceback.format_exc())
|
||||
ret["data"].append({"message": "Data source node execution transmission exception, please check if the node is functioning properly"})
|
||||
ret.update(self.default_status_data)
|
||||
|
||||
self.status_dict = ret
|
||||
self.call_update(ret)
|
||||
|
||||
|
||||
# 本机执行文件传输,返回信息到远程节点
|
||||
class SelfFiletransferTask(object):
|
||||
|
||||
def __init__(self, task_id: int, exclude_nodes: List[int] = None, the_log_id: int = None):
|
||||
self.status_server = StatusServer(self.get_status, (_SOCKET_FILE_DIR + "/file_task_" + str(task_id)))
|
||||
self.f_task = FiletransferTask(task_id, self.update_status, exclude_nodes, the_log_id)
|
||||
|
||||
@staticmethod
|
||||
def get_status( init: bool = False) -> Dict:
|
||||
return {"init": True }
|
||||
|
||||
def start_status_server(self):
|
||||
t = threading.Thread(target=self.status_server.start_server, args=(), daemon=True)
|
||||
t.start()
|
||||
register_cleanup(self.status_server)
|
||||
|
||||
def update_status(self, update_data: Dict):
|
||||
self.status_server.update_status(update_data)
|
||||
|
||||
def start(self):
|
||||
self.start_status_server()
|
||||
self.f_task.start()
|
||||
return
|
||||
|
||||
|
||||
def self_file_running_log(task_id: int, call_log: Callable[[Union[str,dict]], None], timeout:float = 3.0) -> str:
|
||||
socket_file = _SOCKET_FILE_DIR + "/file_task_" + str(task_id)
|
||||
while not os.path.exists(socket_file):
|
||||
if timeout <= 0:
|
||||
return "Task startup timeout"
|
||||
timeout -= 0.05
|
||||
time.sleep(0.05)
|
||||
|
||||
s_client = StatusClient(socket_file, callback=call_log)
|
||||
s_client.connect()
|
||||
s_client.wait_receive()
|
||||
return ""
|
||||
|
||||
|
||||
# 同步执行文件相关任务的重试
|
||||
def file_task_run_sync(task_id: int, log_id: int) -> Union[str, Dict[str, Any]]:
|
||||
fdb = TaskFlowsDB()
|
||||
task = fdb.TransferTask.get_byid(task_id)
|
||||
if not task:
|
||||
return "Task does not exist"
|
||||
|
||||
# 远程节点任务
|
||||
if task.src_node_task_id > 0:
|
||||
node_file_task = NodeFiletransferTask(task, print, exclude_nodes=[], the_log_id=log_id)
|
||||
node_file_task.start()
|
||||
return node_file_task.status_dict
|
||||
|
||||
if not log_id:
|
||||
return "The log ID cannot be empty"
|
||||
log = fdb.TransferLog.get_byid(log_id)
|
||||
if not log:
|
||||
return "log does not exist"
|
||||
|
||||
if log.status != 3:
|
||||
return "The task status is not abnormal, no need to retry"
|
||||
|
||||
if log.transfer_task_id != task_id:
|
||||
return "The log ID does not match the task ID"
|
||||
|
||||
file_task = FiletransferTask(task, print, exclude_nodes=[], the_log_id=log_id)
|
||||
file_task.start()
|
||||
return file_task.status_dict
|
||||
152
mod/project/node/task_flow/flow.py
Normal file
152
mod/project/node/task_flow/flow.py
Normal file
@@ -0,0 +1,152 @@
|
||||
import json
|
||||
import os
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
import traceback
|
||||
from typing import List, Dict, Callable, Any, Union, Optional, Tuple
|
||||
|
||||
from mod.base.ssh_executor import SSHExecutor
|
||||
from mod.project.node.dbutil import ServerNodeDB, CommandTask, CommandLog, TaskFlowsDB, TransferTask
|
||||
from mod.project.node.dbutil import TaskFlowsDB
|
||||
from mod.project.node.nodeutil import LPanelNode, ServerNode, SSHApi
|
||||
from mod.project.node.filetransfer.socket_server import StatusServer, StatusClient, register_cleanup
|
||||
|
||||
from .command_task import CMDTask
|
||||
from .file_task import FiletransferTask, NodeFiletransferTask
|
||||
|
||||
_SOCKET_FILE_DIR = "/tmp/flow_task"
|
||||
if not os.path.exists(_SOCKET_FILE_DIR):
|
||||
os.mkdir(_SOCKET_FILE_DIR)
|
||||
|
||||
|
||||
|
||||
class FlowTask:
|
||||
|
||||
def __init__(self, flow_id: int, step_idx: int=0, sub_id: int=0):
|
||||
self._fdb = TaskFlowsDB()
|
||||
self.flow = self._fdb.Flow.get_byid(flow_id)
|
||||
if not self.flow:
|
||||
raise RuntimeError("Task does not exist")
|
||||
|
||||
self.steps: List[Union[CommandTask, TransferTask]] = [
|
||||
*self._fdb.CommandTask.query("flow_id = ?", (flow_id,)),
|
||||
*self._fdb.TransferTask.query("flow_id = ?", (flow_id,))
|
||||
]
|
||||
|
||||
self.steps.sort(key=lambda x: x.step_index, reverse=False)
|
||||
|
||||
if not self.steps:
|
||||
raise RuntimeError("The task content does not exist")
|
||||
self.now_idx = 1
|
||||
# 当任意错误出现时,是否继续执行
|
||||
self.run_when_error = False
|
||||
if self.flow.strategy.get("run_when_error", False):
|
||||
self.run_when_error = True
|
||||
# 当某个节点出错时,是否在后续步骤中跳过
|
||||
self.exclude_when_error = True
|
||||
if not self.flow.strategy.get("exclude_when_error", True):
|
||||
self.exclude_when_error = False
|
||||
|
||||
self.status_server = StatusServer(self.get_status, (_SOCKET_FILE_DIR + "/flow_task_" + str(flow_id)))
|
||||
self.flow_all_nodes = set([int(i) for i in self.flow.server_ids.split("|") if i and i.isdigit()])
|
||||
|
||||
def get_status(self, init: bool = False):
|
||||
flow_data = self.flow.to_dict()
|
||||
flow_data["steps"] = [x.to_show_data() for x in self.steps]
|
||||
flow_data["now_idx"] = self.now_idx
|
||||
return flow_data
|
||||
|
||||
def start_status_server(self):
|
||||
t = threading.Thread(target=self.status_server.start_server, args=(), daemon=True)
|
||||
t.start()
|
||||
register_cleanup(self.status_server)
|
||||
|
||||
def update_status(self, update_data: Dict):
|
||||
self.status_server.update_status(update_data)
|
||||
|
||||
def _run(self) -> bool:
|
||||
def call_log(log_data):
|
||||
self.update_status(log_data)
|
||||
|
||||
all_status = True # 任务全部成功
|
||||
error_nodes = set()
|
||||
for step in self.steps:
|
||||
if not (self.flow_all_nodes - error_nodes): # 没有节点可执行
|
||||
continue
|
||||
if isinstance(step, CommandTask):
|
||||
if step.status != 2: # 跳过已完成的
|
||||
has_err, task_error_nodes = self.run_cmd_task(step, call_log, exclude_nodes=list(error_nodes))
|
||||
all_status = all_status and not has_err
|
||||
if has_err and not self.run_when_error:
|
||||
return False
|
||||
if self.exclude_when_error and task_error_nodes:
|
||||
error_nodes.update(task_error_nodes)
|
||||
elif isinstance(step, TransferTask):
|
||||
if step.status != 2: # 跳过已完成的
|
||||
has_err, task_error_nodes = self.run_transfer_task(step, call_log, exclude_nodes=list(error_nodes))
|
||||
all_status = all_status and not has_err
|
||||
if has_err and not self.run_when_error:
|
||||
return False
|
||||
if self.exclude_when_error and task_error_nodes:
|
||||
error_nodes.update(task_error_nodes)
|
||||
self.now_idx += 1
|
||||
return all_status
|
||||
|
||||
def start(self):
|
||||
self.start_status_server()
|
||||
|
||||
self.flow.status = "running"
|
||||
self._fdb.Flow.update(self.flow)
|
||||
all_status = self._run()
|
||||
self.flow.status = "complete" if all_status else "error"
|
||||
self._fdb.Flow.update(self.flow)
|
||||
|
||||
self.status_server.stop()
|
||||
# fdb = TaskFlowsDB()
|
||||
# print(fdb.history_flow_task(self.flow.id))
|
||||
return
|
||||
|
||||
@staticmethod
|
||||
def run_cmd_task(task: CommandTask, call_log: Callable[[Any], None], exclude_nodes: List[int] = None) -> Tuple[bool, List[int]]:
|
||||
task = CMDTask(task, 0, call_log, exclude_nodes=exclude_nodes)
|
||||
task.start()
|
||||
return task.status_dict["error"] > 0, task.status_dict["error_nodes"]
|
||||
|
||||
@staticmethod
|
||||
def run_transfer_task(task: TransferTask, call_log: Callable[[Any], None], exclude_nodes: List[int] = None) -> Tuple[bool, List[int]]:
|
||||
if task.src_node_task_id != 0:
|
||||
task = NodeFiletransferTask(task, call_log, exclude_nodes=exclude_nodes, the_log_id=None)
|
||||
task.start()
|
||||
return task.status_dict["error"] > 0, task.status_dict["error_nodes"]
|
||||
else:
|
||||
task = FiletransferTask(task, call_log, exclude_nodes=exclude_nodes)
|
||||
task.start()
|
||||
return task.status_dict["error"] > 0, task.status_dict["error_nodes"]
|
||||
|
||||
|
||||
def flow_running_log(task_id: int, call_log: Callable[[Union[str,dict]], None], timeout:float = 3.0) -> str:
|
||||
socket_file = _SOCKET_FILE_DIR + "/flow_task_" + str(task_id)
|
||||
while not os.path.exists(socket_file):
|
||||
if timeout <= 0:
|
||||
return "Task startup timeout"
|
||||
timeout -= 0.05
|
||||
time.sleep(0.05)
|
||||
|
||||
s_client = StatusClient(socket_file, callback=call_log)
|
||||
s_client.connect()
|
||||
s_client.wait_receive()
|
||||
return ""
|
||||
|
||||
def flow_useful_version(ver: str):
|
||||
# # todo: 临时处理, 上线前确认最新版本号检查逻辑
|
||||
# return True
|
||||
try:
|
||||
ver_list = [int(i) for i in ver.split(".")]
|
||||
if ver_list[0] > 11:
|
||||
return True
|
||||
if ver_list[0] == 11 and ver_list[1] >= 4:
|
||||
return True
|
||||
except:
|
||||
pass
|
||||
return False
|
||||
Reference in New Issue
Block a user