HEX
Server: nginx/1.22.0
System: Linux iZuf6jdxbygmf6cco977lcZ 5.10.84-10.4.al8.x86_64 #1 SMP Tue Apr 12 12:31:07 CST 2022 x86_64
User: root (0)
PHP: 7.4.29
Disabled: passthru,exec,system,chroot,chgrp,chown,shell_exec,proc_open,proc_get_status,ini_alter,ini_restore,dl,readlink,symlink,popepassthru,stream_socket_server,fsocket,popen
Upload Files
File: //usr/local/aegis/PythonLoaderTemp/third_party/aegis_checker/offline/check_thread_hang.py
# -*- coding: utf-8 -*-

import sys
import re
import logging

from aegis_checker.common.aegis_client_log_parser import LogObserver, LOG_INFO, LOG_WARN
from aegis_checker.info.check_result import *


class ThreadHangLogObserver(LogObserver):
    def __init__(self):
        self.__thread_hang_event = []

    def on_end(self, success):
        if not self.__thread_hang_event:
            return

        hang_thread_dict = {}
        for thread_hang_event in self.__thread_hang_event:
            hang_info = thread_hang_event["content"]
            if hang_info in hang_thread_dict:
                hang_thread_dict[hang_info] += 1
            else:
                hang_thread_dict[hang_info] = 1

            logging.warning("offline issue may be caused by thread hang when %s %s, log is %s" %
                           (thread_hang_event["date"], thread_hang_event["time"], hang_info))

        top_hang_thread = ""
        max_count = 0
        for hang_info, count in hang_thread_dict:
            if max_count < count:
                max_count = count
                top_hang_thread = hang_info
        set_root_cause(ROOT_CAUSE_THREAD_HANG, "offline issue may be caused by thread hang, %s", top_hang_thread)

    def on_log(self, log_date, log_time, log_type, content, line, line_num, log_file_path):
        """
        2021-03-24 03:23:04 [Warn] thread hang:rtap::run
        :param log_date:
        :param log_time:
        :param log_type:
        :param content:
        :param line:
        :return:
        """
        thread_hang_reg = r"thread hang:.+"
        if log_type == LOG_WARN and re.match(thread_hang_reg, content):
            # logging.warning("thread hand on %s" % line)
            self.__thread_hang_event.append({
                "date": log_date,
                "time": log_time,
                "content": content,
            })


def test():
    logging.basicConfig(format='%(asctime)s [%(filename)s][%(levelname)s] %(message)s', level=logging.DEBUG)
    login_observer = ThreadHangLogObserver()
    log_file_path = sys.argv[1]
    with open(log_file_path) as f:
        regular = r"^(\d{4}-\d{2}-\d{2}) (\d{2}:\d{2}:\d{2}) \[(\w+)\] (.+)"
        reg = re.compile(regular, re.I)
        line_num = 0
        for line in f:
            match_obj = reg.match(line)
            if match_obj:
                log_date, log_time, log_type, content = match_obj.groups()
                login_observer.on_log(log_date, log_time, log_type, content, line, line_num, log_file_path)
            line_num += 1

        login_observer.on_end(True)


if __name__ == '__main__':
    test()