diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt index 1fdde3664644ae49efa9887f8b8cb444541f4c9b..e11e5e71eb41280a22fd6311d5ef1b06ff3d782c 100644 --- a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/CMakeLists.txt @@ -4,5 +4,8 @@ file(GLOB CPU_PATROL_SRC *.c) add_library(cpu_patrol SHARED ${CPU_PATROL_SRC}) -TARGET_LINK_LIBRARIES(cpu_patrol pthread -ldl) +include_directories(${CMAKE_SOURCE_DIR}/../../../libso/) +link_directories(${CMAKE_SOURCE_DIR}/../../../libso/build/xalarm/) + +TARGET_LINK_LIBRARIES(cpu_patrol pthread -ldl ${CMAKE_SOURCE_DIR}/../../../libso/build/xalarm/libxalarm.so) diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c index 6f161df3ba31198d6531f155103faeffb06731e1..9f8d80c5eb98645dd086523af480373f2f3d3aba 100644 --- a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "cat_structs.h" #include "cpu_patrol_result.h" @@ -166,28 +167,81 @@ static cat_return_t do_cpu_core_offline(unsigned int cpu) return CAT_ERR; } +/* + * 解析字符串(\d, \d)为一个pair +*/ +void parse_string(char* str, int* arr, int size) { + char* token; + int i = 0; + + token = strtok(str, ","); + while (token != NULL && i < size) { + arr[i] = atoi(token); + i++; + token = strtok(NULL, ","); + } +} + +/* + * 根据core_id获取socket_id +*/ +int get_socket_id(int core_id) { + FILE *file; + char line[MAX_LINE_LEN]; + int id_pair[PAIR_LEN]; + int socket_id; + + file = popen("lscpu -p=cpu,socket | grep '[0-9]\\+,[0-9]\\+'", "r"); + if (file == NULL) { + perror("Error opening file"); + return -1; + } + + while (fgets(line, MAX_LINE_LEN, file) != NULL) { + parse_string(line, id_pair, 2); + if (id_pair[0] == core_id) { + return id_pair[1]; + } + } + + fclose(file); + return -1; +} + /* * 功能说明:隔离巡检故障核,并把成功隔离的故障核添加到隔离列表 */ + void isolate_cpu_core(core_list_st *isolated_core_list, const core_list_st *fault_list) { + int ret, core_id, socket_id; unsigned int total_core = sysconf(_SC_NPROCESSORS_CONF); if (total_core == -1) { CAT_LOG_E("Get total cpu cores failed."); return; } for (unsigned short i = 0; i < fault_list->current_nums; i++) { + core_id = fault_list->order_list[i]; + socket_id = get_socket_id(core_id); // 0核不隔离 - if ((fault_list->order_list[i] >= total_core) || (fault_list->order_list[i] == 0)) { - CAT_LOG_E("Isolate cpu core failed, invalid core id(%u)", fault_list->order_list[i]); + if ((core_id >= total_core) || (core_id == 0)) { + CAT_LOG_E("Isolate cpu core failed, invalid core id(%u)", core_id); continue; } - if (get_cpu_core_status(fault_list->order_list[i]) != CPU_STATE_ONLINE) { + if (get_cpu_core_status(core_id) != CPU_STATE_ONLINE) { continue; } - if (do_cpu_core_offline(fault_list->order_list[i]) == CAT_OK) { - (void)insert_core_to_list(isolated_core_list, fault_list->order_list[i]); - CAT_LOG_I(":%d", fault_list->order_list[i]); + if (socket_id == -1) { + CAT_LOG_E("Get socket id failed, core id is (%u)", core_id); + } else { + int ret = cpu_alarm_Report(UCE, CPU, BMC, BMC_COMMAND, ASSERTION, socket_id, core_id); + if (ret != 0) { + CAT_LOG_E("Failed to report to xlarm"); + } + } + if (do_cpu_core_offline(core_id) == CAT_OK) { + (void)insert_core_to_list(isolated_core_list, core_id); + CAT_LOG_I(":%d", core_id); } } } diff --git a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h index f18d5cef57b19952c6b2b73faa8b651010989c16..92dcdc3099d3f28b98b36cebcfe121e8d3b1fd0d 100644 --- a/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h +++ b/sysSentry-1.0.2/src/c/catcli/catlib/plugin/cpu_patrol/cpu_patrol_result.h @@ -8,6 +8,9 @@ #define CPU_PATH_FORMAT "/sys/devices/system/cpu/cpu%d/online" #define PATROL_RESULT_LEN 512 #define MAX_CPU_SYS_FILE_PATH_LEN 256 +#define BMC_COMMAND 0x0001 +#define MAX_LINE_LEN 256 +#define PAIR_LEN 2 typedef enum { CPU_STATE_OFFLINE = '0', diff --git a/sysSentry-1.0.2/src/libso/xalarm/CMakeLists.txt b/sysSentry-1.0.2/src/libso/xalarm/CMakeLists.txt index 77340c7c0ab43e15779884e72489733bc10d95bd..d5d16d4914280dca123c0f4fa647ea878df61bf7 100644 --- a/sysSentry-1.0.2/src/libso/xalarm/CMakeLists.txt +++ b/sysSentry-1.0.2/src/libso/xalarm/CMakeLists.txt @@ -1,7 +1,7 @@ # Copyright (c) Huawei Technologies Co., Ltd. 2023-2024. All rights reserved. # Description: cmake file for libxalarm add_library(xalarm SHARED register_xalarm.c) -target_link_libraries(xalarm PRIVATE pthread) +target_link_libraries(xalarm PRIVATE pthread -ljson-c) set_target_properties(xalarm PROPERTIES LINK_FLAGS "-Wl,-z,relro -Wl,-z,now -Wl,-z,noexecstack -Wtrampolines") set_target_properties(xalarm PROPERTIES CMAKE_C_FLAGS "-shared -fPIC -fstack-protector-strong -D_FORTIFY_SOURCE=2 -O2 -Wall -Werror -g") diff --git a/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.c b/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.c index 90d070d34b0c9a3b3666b2f2fdecb095b52d6c27..152c078aa1c62c55b98a60336e63e63419418136 100644 --- a/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.c +++ b/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -23,6 +24,8 @@ #include #include +#include + #include "register_xalarm.h" #define DIR_XALARM "/var/run/xalarm" @@ -115,17 +118,13 @@ static int create_unix_socket(const char *path) } } - ret = memset_s(&alarm_addr, sizeof(alarm_addr), 0, sizeof(alarm_addr)); - if (ret != 0) { - printf("create_unix_socket: memset_s alarm_addr failed, ret: %d\n", ret); + if (memset(&alarm_addr, 0, sizeof(alarm_addr)) == NULL) { + printf("create_unix_socket: memset alarm_addr failed, ret: %d\n", ret); goto remove_dir; } alarm_addr.sun_family = AF_UNIX; - ret = strncpy_s(alarm_addr.sun_path, sizeof(alarm_addr.sun_path), path, sizeof(alarm_addr.sun_path) - 1); - if (ret != 0) { - printf("create_unix_socket: strncpy_s alarm_addr.sun_path failed, ret: %d\n", ret); - goto remove_dir; - } + strncpy(alarm_addr.sun_path, path, sizeof(alarm_addr.sun_path) - 1); + if (bind(fd, (struct sockaddr *)&alarm_addr, sizeof(alarm_addr.sun_family) + strlen(alarm_addr.sun_path)) < 0) { printf("bind socket failed:%s\n", strerror(errno)); goto remove_dir; @@ -197,8 +196,7 @@ static void set_alarm_id(struct alarm_subscription_info id_filter) { int i; - memset_s(g_register_info.alarm_enable_bitmap, MAX_NUM_OF_ALARM_ID * sizeof(char), - 0, MAX_NUM_OF_ALARM_ID * sizeof(char)); + memset(g_register_info.alarm_enable_bitmap, 0, MAX_NUM_OF_ALARM_ID * sizeof(char)); for (i = 0; i < id_filter.len; i++) { g_register_info.alarm_enable_bitmap[id_filter.id_list[i] - MIN_ALARM_ID] = ALARM_ENABLED; } @@ -300,8 +298,7 @@ void xalarm_UnRegister(int client_id) } } - memset_s(g_register_info.alarm_enable_bitmap, MAX_NUM_OF_ALARM_ID * sizeof(char), - 0, MAX_NUM_OF_ALARM_ID * sizeof(char)); + memset(g_register_info.alarm_enable_bitmap, 0, MAX_NUM_OF_ALARM_ID * sizeof(char)); g_register_info.callback = NULL; g_register_info.is_registered = false; } @@ -333,7 +330,7 @@ char *xalarm_getdesc(const struct alarm_info *palarm) return palarm == NULL ? NULL : (char *)palarm->pucParas; } -static int init_report_addr(struct sockaddr_un *alarm_addr) +static int init_report_addr(struct sockaddr_un *alarm_addr, char *report_path) { int ret; @@ -342,18 +339,12 @@ static int init_report_addr(struct sockaddr_un *alarm_addr) return -1; } - ret = memset_s(alarm_addr, sizeof(struct sockaddr_un), 0, sizeof(struct sockaddr_un)); - if (ret != 0) { - fprintf(stderr, "%s: memset_s alarm_addr failed, ret: %d\n", __func__, ret); + if (memset(alarm_addr, 0, sizeof(struct sockaddr_un)) == NULL) { + fprintf(stderr, "%s: memset alarm_addr failed, ret: %d\n", __func__, ret); return -1; } alarm_addr->sun_family = AF_UNIX; - ret = strncpy_s(alarm_addr->sun_path, sizeof(alarm_addr->sun_path), PATH_REPORT_ALARM, - sizeof(alarm_addr->sun_path) - 1); - if (ret != 0) { - fprintf(stderr, "%s: strncpy_s alarm_addr->sun_path failed, ret: %d\n", __func__, ret); - return -1; - } + strncpy(alarm_addr->sun_path, report_path, sizeof(alarm_addr->sun_path) - 1); return 0; } @@ -372,9 +363,8 @@ int xalarm_Report(unsigned short usAlarmId, unsigned char ucAlarmLevel, return -1; } - ret = memset_s(&info, sizeof(struct alarm_info), 0, sizeof(struct alarm_info)); - if (ret != 0) { - fprintf(stderr, "%s: memset_s info failed, ret: %d\n", __func__, ret); + if (memset(&info, 0, sizeof(struct alarm_info)) == NULL) { + fprintf(stderr, "%s: memset info failed, ret: %d\n", __func__, ret); return -1; } info.usAlarmId = usAlarmId; @@ -382,11 +372,7 @@ int xalarm_Report(unsigned short usAlarmId, unsigned char ucAlarmLevel, info.ucAlarmType = ucAlarmType; gettimeofday(&info.AlarmTime, NULL); if (pucParas != NULL) { - ret = strncpy_s((char *)info.pucParas, MAX_PARAS_LEN, (char *)pucParas, MAX_PARAS_LEN - 1); - if (ret != 0) { - fprintf(stderr, "%s: strncpy_s info.pucParas failed, ret: %d\n", __func__, ret); - return -1; - } + strncpy((char *)info.pucParas, (char *)pucParas, MAX_PARAS_LEN - 1); } fd = socket(AF_UNIX, SOCK_DGRAM, 0); @@ -395,7 +381,7 @@ int xalarm_Report(unsigned short usAlarmId, unsigned char ucAlarmLevel, return -1; } - ret = init_report_addr(&alarm_addr); + ret = init_report_addr(&alarm_addr, PATH_REPORT_ALARM); if (ret == -1) { close(fd); return -1; @@ -426,6 +412,112 @@ int xalarm_Report(unsigned short usAlarmId, unsigned char ucAlarmLevel, } +bool is_valid_report_module(unsigned short module) { + switch ((int) module) { + case CPU: + return true; + default: + return false; + } +} + +bool is_valid_report_type(unsigned short type) { + switch ((int) type) { + case CE: + case UCE: + return true; + default: + return false; + } +} + +bool is_valid_report_trans_to(unsigned short trans_to) { + switch ((int) trans_to) { + case BMC: + return true; + default: + return false; + } +} + +bool check_params(unsigned short type, unsigned short module, unsigned short trans_to, int report_info_len) { + bool is_valid_type = is_valid_report_type(type); + bool is_valid_module = is_valid_report_module(module); + bool is_valid_trans_to = is_valid_report_trans_to(trans_to); + bool is_valid_report_info_len = (report_info_len >= 0 && report_info_len <= 999) ? true : false; + + return is_valid_type && is_valid_module && is_valid_trans_to && is_valid_report_info_len; +} + +int cpu_alarm_Report(unsigned short type, unsigned short module, unsigned short trans_to, unsigned short command, + unsigned short event_type, int socket_id, int core_id) +{ + int ret, fd; + bool is_valid; + int report_info_len; + char report_info[MAX_CHAR_LEN]; + char alarm_msg[MAX_CHAR_LEN]; + struct sockaddr_un alarm_addr; + + fd = socket(AF_UNIX, SOCK_STREAM, 0); + if (fd < 0) { + fprintf(stderr, "%s socket create error: %s\n", __func__, strerror(errno)); + return -1; + } + + ret = init_report_addr(&alarm_addr, PATH_REPORT_CPU_ALARM); + if (ret == -1) { + close(fd); + return -1; + } + + sprintf(report_info, "%u %u %d %d", command, event_type, socket_id, core_id); + + report_info_len = strlen(report_info); + is_valid = check_params(type, module, trans_to, report_info_len); + if (!is_valid) { + fprintf(stderr, "%s: cpu_alarm: invalid params\n", __func__); + close(fd); + return -1; + } + + sprintf(alarm_msg, "REP%1u%1u%02u%03d%s", type, module, trans_to, report_info_len, report_info); + + while (true) { + ret = connect(fd, (struct sockaddr *)&alarm_addr, offsetof(struct sockaddr_un, sun_path) + strlen(alarm_addr.sun_path)); + + if (ret < 0) { + if (errno == EINTR) { + /* interrupted by signal, ignore */ + continue; + } else { + fprintf(stderr, "%s: connect failed errno: %d\n", __func__, errno); + } + } + + ret = write(fd, alarm_msg, strlen(alarm_msg)); + if (ret < 0) { + if (errno == EINTR) { + /* interrupted by signal, ignore */ + continue; + } else { + fprintf(stderr, "%s: write failed errno: %d\n", __func__, errno); + } + } else if (ret == 0) { + fprintf(stderr, "%s: write failed, ret is 0\n", __func__); + } else { + if (ret != strlen(alarm_msg)) { + fprintf(stderr, "%s write failed, ret:%d, len:%d\n", __func__, ret, strlen(alarm_msg)); + } + } + break; + } + close(fd); + + return (ret > 0) ? 0 : -1; +} + + /** * @brief send data to socket * @@ -447,18 +539,13 @@ int send_data_to_socket(const char *socket_path, const char *message) } // set socket address - ret = memset_s(&addr, sizeof(addr), 0, sizeof(struct sockaddr_un)); - if (ret != 0) { - fprintf(stderr, "%s: memset_s info failed, ret: %d\n", __func__, ret); + if (memset(&addr, 0, sizeof(struct sockaddr_un)) == NULL) { + fprintf(stderr, "%s: memset info failed.\n", __func__); return RETURE_CODE_FAIL; } addr.sun_family = AF_UNIX; - ret = strncpy_s(addr.sun_path, sizeof(addr.sun_path), socket_path, sizeof(addr.sun_path) - 1); - if (ret != 0) { - fprintf(stderr, "%s: strncpy_s failed\n", __func__); - return RETURE_CODE_FAIL; - } + strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1); // connect socket if (connect(sockfd, (struct sockaddr *)&addr, sizeof(struct sockaddr_un)) == -1) { fprintf(stderr, "failed to connect socket %s\n", socket_path); @@ -561,20 +648,13 @@ int report_result(const char *task_name, enum RESULT_LEVEL result_level, const c } char message[RESULT_INFO_HEAD_LEN + RESULT_INFO_MAX_LEN]; - int ret = memset_s(message, sizeof(message), 0, RESULT_INFO_HEAD_LEN + RESULT_INFO_MAX_LEN); - if (ret != 0) { - fprintf(stderr, "%s: memset_s message failed", __func__); + if (memset(message, 0, RESULT_INFO_HEAD_LEN + RESULT_INFO_MAX_LEN) == NULL) { + fprintf(stderr, "%s: memset message failed", __func__); json_object_put(send_data); return RETURE_CODE_FAIL; } - ret = sprintf_s(message, sizeof(message) - 1, "%s%04d%s", RESULT_INFO_HEAD_MAGIC, - send_data_len, result_json_string); - if (ret < 0) { - fprintf(stderr, "%s: failed to send result message (%s) to sysSentry, sprintf_s failed\n", __func__, message); - json_object_put(send_data); - return RETURE_CODE_FAIL; - } + sprintf(message, "%s%04d%s", RESULT_INFO_HEAD_MAGIC, send_data_len, result_json_string); if (send_data_to_socket(RESULT_REPORT_SOCKET, message)) { fprintf(stderr, "%s: failed to send result message (%s) to sysSentry!\n", __func__, message); @@ -585,3 +665,4 @@ int report_result(const char *task_name, enum RESULT_LEVEL result_level, const c json_object_put(send_data); return RETURE_CODE_SUCCESS; } + diff --git a/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.h b/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.h index f17dbd62b891878c3c18a2efe237ae25d88bdc33..1f26c6a06588a6b9b3b570f6e7e6bbbef02970c7 100644 --- a/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.h +++ b/sysSentry-1.0.2/src/libso/xalarm/register_xalarm.h @@ -30,6 +30,9 @@ #define MAX_NUM_OF_ALARM_ID 128 +#define PATH_REPORT_CPU_ALARM "/var/run/sysSentry/report.sock" +#define MAX_CHAR_LEN 128 + /* * usAlarmId:unsigned short,告警id,某一类故障一个id,id定义避免重复。 * ucAlarmLevel: 告警级别,从FATAL到DEBUG @@ -45,6 +48,25 @@ struct alarm_info { struct timeval AlarmTime; char pucParas[ALARM_INFO_MAX_PARAS_LEN]; }; + +enum report_module { + CPU = 0x00 +}; +enum report_type { + CE = 0x00, + UCE = 0x01 +}; +enum report_trans_to { + BMC = 0x01 +}; + +enum report_event_type { + ASSERTION = 0x00, + DEASSERTION = 0x01 +}; + +int cpu_alarm_Report(unsigned short type, unsigned short module, unsigned short trans_to, unsigned short command, + unsigned short event_type, int socket_id, int core_id); /* * hook回调函数处理 diff --git a/sysSentry-1.0.2/src/python/syssentry/cpu_alarm.py b/sysSentry-1.0.2/src/python/syssentry/cpu_alarm.py new file mode 100644 index 0000000000000000000000000000000000000000..d972c42cefe48de564e90f39434548f1d8cd81a1 --- /dev/null +++ b/sysSentry-1.0.2/src/python/syssentry/cpu_alarm.py @@ -0,0 +1,209 @@ +import re +import math +import logging +from enum import Enum + +from .utils import execute_command + +MAX_CORE_ID = 1024 +MAX_SOCKET_ID = 255 +CPU_ALARM_PARAM_LEN = 4 +DEFAULT_CORE_ID_ARRAY_CAPACITY = 32 +BIT_8 = 8 +BIN_PREFIX_LEN = 2 +BINARY = 2 +MIN_DATA_LEN = 0 +MAX_DATA_LEN = 999 + + +class Type(Enum): + CE = 0x00 + UCE = 0x01 + + +class Module(Enum): + CPU = 0x00 + + +class TransTo(Enum): + BMC = 0x01 + + +class EventType(Enum): + ASSERTION = 0x00 + DEASSERTION = 0x01 + + +def is_valid_enum_value(enum_type, value): + for enum in enum_type: + if enum.value == value: + return True + return False + + +def check_input_param(cpu_alarm_info): + command, event_type, socket_id, core_id = cpu_alarm_info + if not is_valid_enum_value(EventType, event_type): + raise ValueError("invalid param `event_type`") + if not (0 <= socket_id <= MAX_SOCKET_ID): + raise ValueError("invalid param `socket_id`") + if not (0 <= core_id <= MAX_CORE_ID): + raise ValueError("invalid param `core_id`") + + +def parser_cpu_alarm_info(req_data): + if not req_data: + raise ValueError("recv empty data") + + cpu_alarm_info = list(map(int, req_data.split())) + + if len(cpu_alarm_info) != CPU_ALARM_PARAM_LEN: + logging.debug( + "expected {} params in fixed params, got {}".format( + CPU_ALARM_PARAM_LEN, len(cpu_alarm_info) + ) + ) + raise ValueError + + check_input_param(cpu_alarm_info) + + return cpu_alarm_info + + +def get_cpu_num(): + cmd_list = ["/usr/bin/lscpu"] + ret = execute_command(cmd_list) + if not ret: + return -1 + matches = list(re.finditer(r"^\s*(CPU|CPU\(s\)):\s*\d+$", ret, re.MULTILINE)) + cpu_num_str = matches[0].group(0).split()[-1] + return int(cpu_num_str) + + +def get_cpu_interval(): + cmd_list = ["/usr/sbin/dmidecode", "-t", "processor"] + ret = execute_command(cmd_list) + if not ret: + return -1 + + core_count_pattern = r"Core Count:\s*\d+" + matches_core_count = [ + item.group(0) for item in re.finditer(core_count_pattern, ret) + ] + + thread_count_pattern = r"Thread Count:\s*\d+" + matches_thread_count = [ + item.group(0) for item in re.finditer(thread_count_pattern, ret) + ] + + if len(matches_core_count) != len(matches_thread_count): + logging.error("mismatched core count nums with thread count nums") + raise ValueError("Unexpected core count nums with thread count nums") + + group_num = 0 + for core_count_str, thread_count_str in zip( + matches_core_count, matches_thread_count + ): + core_count = int(core_count_str.split()[-1]) + thread_count = int(thread_count_str.split()[-1]) + if core_count == 0 or thread_count < core_count: + logging.error("thread count is less than core count or core count is 0") + raise ValueError("Unexpected value of thread count and core count") + group_num += thread_count // core_count + + cpu_num = get_cpu_num() + + core_siblings_list = [] + if group_num == 0: + logging.error("unexpected value of group num with `0`") + raise ValueError("Unexpected value of group num") + cpu_num_per_group = cpu_num // group_num + + if cpu_num_per_group <= 0: + logging.error("got invalid cpu num per group") + raise ValueError("Unexpected value of cpu num per group") + + for i in range(0, cpu_num, cpu_num_per_group): + core_siblings_list.append((i, i + cpu_num_per_group - 1)) + return core_siblings_list, cpu_num_per_group + + +def get_core_id(core_id_logical, core_siblings_list): + for tup in core_siblings_list: + begin, end = tup + if begin <= core_id_logical <= end: + return core_id_logical - begin + return -1 + + +def upload_bmc(_type, module, command, event_type, socket_id, core_id_logical): + try: + if _type != Type.UCE.value: + logging.error("invalid param `type` for upload bmc") + return + if module != Module.CPU.value: + logging.error("invalid param `module` for upload bmc") + return + + core_siblings_list, cpu_num_per_group = get_cpu_interval() + core_id = get_core_id(core_id_logical, core_siblings_list) + if core_id < 0: + logging.error("cannot map `logical_core_id` to `core_siblings`") + return + + core_id_array_capacity = DEFAULT_CORE_ID_ARRAY_CAPACITY + if cpu_num_per_group > DEFAULT_CORE_ID_ARRAY_CAPACITY: + core_id_array_capacity = math.ceil(cpu_num_per_group / 8) * 8 + + core_id_bin_str = bin(1 << core_id)[BIN_PREFIX_LEN:].zfill( + core_id_array_capacity + ) + core_id_array = [] + + for i in range(0, core_id_array_capacity, BIT_8): + core_id_array.append(int(core_id_bin_str[i : i + BIT_8], BINARY)) + + core_id_array.reverse() + core_id_cmd = ["{:#04X}".format(_id) for _id in core_id_array] + + cmd_list = [ + "/usr/bin/ipmitool", + "raw", + "0x30", + "0x92", + "0xdb", + "0x07", + "0x00", + "0x05", + "0x01", + "{:#04X}".format(event_type), + "0x0f", + "{:#04X}".format(socket_id), + ] + cmd_list.extend(core_id_cmd) + execute_command(cmd_list) + except (ValueError, TypeError): + logging.error("failed to resolve bmc params") + + +def check_fixed_param(data, expect): + if not data: + raise ValueError("recv empty param") + data = data.decode() + if isinstance(expect, tuple): + if not expect[0] <= int(data) <= expect[1]: + raise ValueError("expected number range param is not in specified range") + return int(data) + elif type(expect) == type(Enum): + if not is_valid_enum_value(expect, int(data)): + raise ValueError("expected enum value param is not valid") + return int(data) + elif isinstance(expect, int): + if int(data) != expect: + raise ValueError("expected number param is not valid") + return int(data) + elif isinstance(expect, str): + if data != expect: + raise ValueError("expected str param is not valid") + return data + raise NotImplementedError("unexpected param type") diff --git a/sysSentry-1.0.2/src/python/syssentry/syssentry.py b/sysSentry-1.0.2/src/python/syssentry/syssentry.py index de9dcad15af8ff2e8a0164417c0b521b60af8fe5..32b81e30b5da3255c91be365b3c9dc98c2f5b9a7 100644 --- a/sysSentry-1.0.2/src/python/syssentry/syssentry.py +++ b/sysSentry-1.0.2/src/python/syssentry/syssentry.py @@ -36,6 +36,8 @@ from .heartbeat import (heartbeat_timeout_chk, heartbeat_fd_create, from .result import RESULT_MSG_HEAD_LEN, RESULT_MSG_MAGIC_LEN, RESULT_MAGIC from .result import RESULT_LEVEL_ERR_MSG_DICT, ResultLevel from .utils import get_current_time_string +from .cpu_alarm import (upload_bmc, check_fixed_param, parser_cpu_alarm_info, + Type, Module, TransTo, MIN_DATA_LEN, MAX_DATA_LEN) INSPECTOR = None @@ -73,6 +75,47 @@ PID_FILE_FLOCK = None # result-specific socket RESULT_SOCKET_PATH = "/var/run/sysSentry/result.sock" +CPU_ALARM_SOCKET_PATH = "/var/run/sysSentry/report.sock" +PARAM_REP_LEN = 3 +PARAM_TYPE_LEN = 1 +PARAM_MODULE_LEN = 1 +PARAM_TRANS_TO_LEN = 2 +PARAM_DATA_LEN = 3 + + +def cpu_alarm_recv(server_socket: socket.socket): + try: + client_socket, _ = server_socket.accept() + logging.debug("cpu alarm fd listen ok") + + data = client_socket.recv(PARAM_REP_LEN) + check_fixed_param(data, "REP") + + data = client_socket.recv(PARAM_TYPE_LEN) + _type = check_fixed_param(data, Type) + + data = client_socket.recv(PARAM_MODULE_LEN) + module = check_fixed_param(data, Module) + + data = client_socket.recv(PARAM_TRANS_TO_LEN) + trans_to = check_fixed_param(data, TransTo) + + data = client_socket.recv(PARAM_DATA_LEN) + data_len = check_fixed_param(data, (MIN_DATA_LEN, MAX_DATA_LEN)) + + data = client_socket.recv(data_len) + + command, event_type, socket_id, core_id = parser_cpu_alarm_info(data) + except socket.error: + logging.error("socket error") + return + except (ValueError, OSError, UnicodeError, TypeError, NotImplementedError): + logging.error("server recv cpu alarm msg failed!") + client_socket.close() + return + + upload_bmc(_type, module, command, event_type, socket_id, core_id) + def msg_data_process(msg_data): """message data process""" @@ -280,6 +323,42 @@ def server_fd_create(): return server_fd +def cpu_alarm_fd_create(): + """create heartbeat fd""" + if not os.path.exists(SENTRY_RUN_DIR): + logging.debug("%s not exist", SENTRY_RUN_DIR) + return None + + try: + cpu_alarm_fd = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + except socket.error: + logging.error("cpu alarm fd create failed") + return None + + cpu_alarm_fd.setblocking(False) + if os.path.exists(CPU_ALARM_SOCKET_PATH): + os.remove(CPU_ALARM_SOCKET_PATH) + + try: + cpu_alarm_fd.bind(CPU_ALARM_SOCKET_PATH) + except OSError: + logging.error("cpu alarm fd bind failed") + cpu_alarm_fd.close() + return None + + os.chmod(CPU_ALARM_SOCKET_PATH, 0o600) + try: + cpu_alarm_fd.listen(5) + except OSError: + logging.error("cpu alarm fd listen failed") + cpu_alarm_fd.close() + return None + + logging.debug("%s bind and listen", CPU_ALARM_SOCKET_PATH) + + return cpu_alarm_fd + + def server_result_recv(server_socket: socket.socket): """server result receive""" try: @@ -369,20 +448,20 @@ def main_loop(): server_result_fd.close() return + cpu_alarm_fd = cpu_alarm_fd_create() + if not cpu_alarm_fd: + server_fd.close() + heartbeat_fd.close() + server_result_fd.close() + return + epoll_fd = select.epoll() epoll_fd.register(server_fd.fileno(), select.EPOLLIN) epoll_fd.register(server_result_fd.fileno(), select.EPOLLIN) epoll_fd.register(heartbeat_fd.fileno(), select.EPOLLIN) + epoll_fd.register(cpu_alarm_fd.fileno(), select.EPOLLIN) logging.debug("start main loop") - # onstart_tasks_handle() - for task_type in TasksMap.tasks_dict: - for task_name in TasksMap.tasks_dict.get(task_type): - task = TasksMap.tasks_dict.get(task_type).get(task_name) - if not task: - continue - task.onstart_handle() - while True: try: events_list = epoll_fd.poll(SERVER_EPOLL_TIMEOUT) @@ -393,6 +472,8 @@ def main_loop(): server_result_recv(server_result_fd) elif event_fd == heartbeat_fd.fileno(): heartbeat_recv(heartbeat_fd) + elif event_fd == cpu_alarm_fd.fileno(): + cpu_alarm_recv(cpu_alarm_fd) else: continue diff --git a/sysSentry-1.0.2/src/python/syssentry/utils.py b/sysSentry-1.0.2/src/python/syssentry/utils.py index be80b0f4eb91f08a89c6465b9f9511e578a24e67..21afb8fb34e60be83a9d8d265c9957f496149133 100644 --- a/sysSentry-1.0.2/src/python/syssentry/utils.py +++ b/sysSentry-1.0.2/src/python/syssentry/utils.py @@ -12,6 +12,7 @@ """ some common function """ +import logging import subprocess from datetime import datetime, timezone, timedelta @@ -56,3 +57,21 @@ def get_current_time_string(): current_utc_time = datetime.now(timezone.utc) utc8_timezone = timezone(timedelta(hours=8)) return current_utc_time.astimezone(utc8_timezone).strftime("%Y-%m-%d %H:%M:%S") + + +def execute_command(cmd_list): + try: + process = subprocess.run( + cmd_list, + shell=False, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + encoding="utf-8", + ) + returncode = process.returncode + if returncode != 0: + logging.error("execute command with illegal returncode") + return None + return process.stdout + except OSError: + logging.error("failed to execute command")