Browse Source

feat: add feishu notice

master
lework 4 years ago
parent
commit
f7c08f171a
  1. 160
      python/supervisor_healthCheck.py

160
python/supervisor_healthCheck.py

@ -21,6 +21,8 @@ import datetime
import platform import platform
import threading import threading
import subprocess import subprocess
import hmac
from hashlib import sha256
from email.header import Header from email.header import Header
from email.mime.text import MIMEText from email.mime.text import MIMEText
from collections import namedtuple from collections import namedtuple
@ -69,15 +71,16 @@ def shell(cmd):
proc.wait() proc.wait()
return (proc.returncode,) + proc.communicate() return (proc.returncode,) + proc.communicate()
def drop_cache(): def drop_cache():
""" """
清除缓存, 1: pagecache, 2: dentries and inodes, 3: 1+2 清除缓存, 1: pagecache, 2: dentries and inodes, 3: 1+2
""" """
cmd = "sync && echo 1 > /proc/sys/vm/drop_caches" cmd = "sync && echo 1 > /proc/sys/vm/drop_caches"
exitcode, _, _ = shell(cmd) exitcode, _, _ = shell(cmd)
return exitcode return exitcode
def get_proc_cpu(pid): def get_proc_cpu(pid):
""" """
获取进程CPU使用率 获取进程CPU使用率
@ -98,6 +101,7 @@ def get_proc_cpu(pid):
return None return None
return cpu_utilization return cpu_utilization
def get_proc_mem(pid, type="rss"): def get_proc_mem(pid, type="rss"):
""" """
获取进程内存使用 获取进程内存使用
@ -170,12 +174,14 @@ class HealthCheck(object):
self.mail_config = None self.mail_config = None
self.wechat_config = None self.wechat_config = None
self.dingding_config = None self.dingding_config = None
self.feishu_config = None
self.supervisord_url = 'unix:///var/run/supervisor.sock' self.supervisord_url = 'unix:///var/run/supervisor.sock'
if 'config' in config: if 'config' in config:
self.mail_config = config['config'].get('mail') self.mail_config = config['config'].get('mail')
self.wechat_config = config['config'].get('wechat') self.wechat_config = config['config'].get('wechat')
self.dingding_config = config['config'].get('dingding') self.dingding_config = config['config'].get('dingding')
self.feishu_config = config['config'].get('feishu')
self.supervisord_url = config['config'].get('supervisordUrl', self.supervisord_url) self.supervisord_url = config['config'].get('supervisordUrl', self.supervisord_url)
self.supervisord_user = config['config'].get('supervisordUser', None) self.supervisord_user = config['config'].get('supervisordUser', None)
self.supervisord_pass = config['config'].get('supervisordPass', None) self.supervisord_pass = config['config'].get('supervisordPass', None)
@ -184,7 +190,7 @@ class HealthCheck(object):
self.program_config = config self.program_config = config
# 只保留通知action # 只保留通知action
self.notice_action = ['email', 'wechat', 'dingding'] self.notice_action = ['email', 'wechat', 'dingding', 'feishu']
self.periodSeconds = 5 self.periodSeconds = 5
self.failureThreshold = 3 self.failureThreshold = 3
@ -516,17 +522,17 @@ class HealthCheck(object):
if 'restart' in action_list: if 'restart' in action_list:
restart_result = self.action_supervisor_restart(program) restart_result = self.action_supervisor_restart(program)
msg += '\r\n Restart%s' % restart_result msg += '\r\n**Restart**%s' % restart_result
elif 'exec' in action_list: elif 'exec' in action_list:
action_exec_cmd = config.get('action_exec_cmd') action_exec_cmd = config.get('action_exec_cmd')
exec_result = self.action_exec(program, action_exec_cmd) exec_result = self.action_exec(program, action_exec_cmd)
msg += '\r\n Exec%s' % exec_result msg += '\r\n**Exec**%s' % exec_result
elif 'kill' in action_list: elif 'kill' in action_list:
pid_get = config.get('pidGet', 'supervisor') pid_get = config.get('pidGet', 'supervisor')
pid_file = config.get('pidFile', ) pid_file = config.get('pidFile', )
pid, err = self.get_pid(program, pid_get, pid_file) pid, err = self.get_pid(program, pid_get, pid_file)
kill_result = self.action_kill(program, pid) kill_result = self.action_kill(program, pid)
msg += '\r\n Kill%s' % kill_result msg += '\r\n**Kill**%s' % kill_result
if 'email' in action_list and self.mail_config: if 'email' in action_list and self.mail_config:
self.action_email(program, action_type, msg, check_status) self.action_email(program, action_type, msg, check_status)
@ -534,6 +540,8 @@ class HealthCheck(object):
self.action_wechat(program, action_type, msg, check_status) self.action_wechat(program, action_type, msg, check_status)
if 'dingding' in action_list and self.dingding_config: if 'dingding' in action_list and self.dingding_config:
self.action_dingding(program, action_type, msg, check_status) self.action_dingding(program, action_type, msg, check_status)
if 'feishu' in action_list and self.feishu_config:
self.action_feishu(program, action_type, msg, check_status)
def action_supervisor_restart(self, program): def action_supervisor_restart(self, program):
""" """
@ -710,6 +718,7 @@ class HealthCheck(object):
} }
access_token_url = '/cgi-bin/gettoken?corpid={id}&corpsecret={crt}'.format(id=corpid, crt=secret) access_token_url = '/cgi-bin/gettoken?corpid={id}&corpsecret={crt}'.format(id=corpid, crt=secret)
try: try:
httpClient = httplib.HTTPSConnection(host, timeout=10) httpClient = httplib.HTTPSConnection(host, timeout=10)
httpClient.request("GET", access_token_url, headers=headers) httpClient.request("GET", access_token_url, headers=headers)
@ -806,7 +815,8 @@ class HealthCheck(object):
else: else:
title = "[%s] Health check failed" % program title = "[%s] Health check failed" % program
data = {"msgtype": "markdown", data = {
"msgtype": "markdown",
"markdown": { "markdown": {
"title": title, "title": title,
"text": "#### 详情信息: \n> Program:%s \n\n> DataTime: %s \n\n> Hostname: %s \n\n> Platfrom: %s \n\n> Msg:%s" % ( "text": "#### 详情信息: \n> Program:%s \n\n> DataTime: %s \n\n> Hostname: %s \n\n> Platfrom: %s \n\n> Msg:%s" % (
@ -832,6 +842,125 @@ class HealthCheck(object):
self.log(program, '[Action: dingding] send success') self.log(program, '[Action: dingding] send success')
return True return True
def action_feishu(self, program, action_type, msg, check_status):
"""
飞书通知
:param program:
:param action_type:
:param msg:
:param check_status:
:return:
"""
host = "open.feishu.cn"
secret = self.feishu_config.get('secret')
webhook = self.feishu_config.get('webhook')
headers = {
'Content-Type': 'application/json'
}
send_url = "/open-apis/bot/v2/hook/{webhook}".format(webhook=webhook)
ip = ""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
except Exception as e:
self.log(program, '[Action: feishu] get ip error %s' % e)
finally:
s.close()
hostname = platform.node().split('.')[0]
system_platform = platform.platform()
curr_dt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
if check_status == 'success':
title = "[Supervisor] %s Health check successful" % program
title_color = "green"
else:
title = "[Supervisor] %s Health check failed" % program
title_color = "red"
content = "**DataTime**: {curr_dt}\n**Program**: {program}\n**IP**: {ip}\n**Hostname**: {hostname}\n**Platfrom**: {platfrom}\n**Action**: {action}\n**Msg**: {msg}".format(
curr_dt=curr_dt, program=program, ip=ip, hostname=hostname,
platfrom=system_platform, action=action_type, msg=msg)
data = {
"msg_type": "interactive",
"card": {
"config": {
"wide_screen_mode": True,
"enable_forward": True
},
"header": {
"title": {
"content": title,
"tag": "plain_text"
},
"template": title_color
},
"elements": [{
"tag": "div",
"text": {
"content": "详细信息:",
"tag": "lark_md"
},
"fields": [
{
"is_short": False,
"text": {
"tag": "lark_md",
"content": content
}
}]
}]
}
}
if secret != "":
msg = ""
timestamp = ""
if PY3:
timestamp = str(round(time.time()))
key = '{}\n{}'.format(timestamp, secret)
key_enc = key.encode('utf-8')
msg_enc = msg.encode('utf-8')
else:
print("python2")
timestamp = long(round(time.time()))
key = '{}\n{}'.format(timestamp, secret)
key_enc = bytes(key).encode('utf-8')
msg_enc = bytes(msg).encode('utf-8')
hmac_code = hmac.new(key_enc, msg_enc, digestmod=sha256).digest()
sign = base64.b64encode(hmac_code).decode('utf-8')
data['timestamp'] = timestamp
data['sign'] = sign
print(data)
httpClient = httplib.HTTPSConnection(host, timeout=10)
try:
httpClient.request("POST", send_url, json.dumps(data), headers=headers)
response = httpClient.getresponse()
result = json.loads(response.read())
if result.get('StatusCode', 1) != 0:
self.log(program, '[Action: feishu] send faild %s' % result)
return False
except Exception as e:
self.log(program, '[Action: feishu] send error [%s] %s' % (result, e))
return False
finally:
if httpClient:
httpClient.close()
self.log(program, '[Action: feishu] send success')
return True
def start(self): def start(self):
""" """
启动检测 启动检测
@ -900,6 +1029,9 @@ config: # 脚本配置名称,请勿更
# totag: # totag:
# dingding: # 钉钉通知配置 # dingding: # 钉钉通知配置
access_token: access_token:
# feishu: # 飞书通知配置
webhook:
secret:
# 内存方式监控 # 内存方式监控
cat1: # supervisor中配置的program名称 cat1: # supervisor中配置的program名称
@ -912,9 +1044,9 @@ cat1: # supervisor中配置的program名称
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1 initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3 failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1 successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令 execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False sendResolved: True # 是否发送恢复通知 默认: False
# cpu方式监控 # cpu方式监控
cat2: # supervisor中配置的program名称 cat2: # supervisor中配置的program名称
@ -926,9 +1058,9 @@ cat2: # supervisor中配置的program名称
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1 initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3 failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1 successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令 execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False sendResolved: True # 是否发送恢复通知 默认: False
# HTTP方式监控 # HTTP方式监控
cat3: cat3:
@ -946,9 +1078,9 @@ cat3:
timeoutSeconds: 5 # 检查超时的秒数, 默认: 3 timeoutSeconds: 5 # 检查超时的秒数, 默认: 3
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3 failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1 successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令 execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False sendResolved: True # 是否发送恢复通知 默认: False
# TCP方式监控 # TCP方式监控
cat4: cat4:
@ -960,9 +1092,9 @@ cat4:
timeoutSeconds: 5 # 检查超时的秒数, 默认: 3 timeoutSeconds: 5 # 检查超时的秒数, 默认: 3
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3 failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1 successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令 execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False sendResolved: True # 是否发送恢复通知 默认: False
""" """
with open(config_file, 'w') as f: with open(config_file, 'w') as f:
f.write(example_config) f.write(example_config)

Loading…
Cancel
Save