Browse Source

feat: add feishu notice

master
lework 4 years ago
parent
commit
f7c08f171a
  1. 174
      python/supervisor_healthCheck.py

174
python/supervisor_healthCheck.py

@ -21,6 +21,8 @@ import datetime @@ -21,6 +21,8 @@ import datetime
import platform
import threading
import subprocess
import hmac
from hashlib import sha256
from email.header import Header
from email.mime.text import MIMEText
from collections import namedtuple
@ -69,15 +71,16 @@ def shell(cmd): @@ -69,15 +71,16 @@ def shell(cmd):
proc.wait()
return (proc.returncode,) + proc.communicate()
def drop_cache():
"""
清除缓存, 1: pagecache, 2: dentries and inodes, 3: 1+2
"""
cmd = "sync && echo 1 > /proc/sys/vm/drop_caches"
exitcode, _, _ = shell(cmd)
return exitcode
def get_proc_cpu(pid):
"""
获取进程CPU使用率
@ -98,6 +101,7 @@ def get_proc_cpu(pid): @@ -98,6 +101,7 @@ def get_proc_cpu(pid):
return None
return cpu_utilization
def get_proc_mem(pid, type="rss"):
"""
获取进程内存使用
@ -170,12 +174,14 @@ class HealthCheck(object): @@ -170,12 +174,14 @@ class HealthCheck(object):
self.mail_config = None
self.wechat_config = None
self.dingding_config = None
self.feishu_config = None
self.supervisord_url = 'unix:///var/run/supervisor.sock'
if 'config' in config:
self.mail_config = config['config'].get('mail')
self.wechat_config = config['config'].get('wechat')
self.dingding_config = config['config'].get('dingding')
self.feishu_config = config['config'].get('feishu')
self.supervisord_url = config['config'].get('supervisordUrl', self.supervisord_url)
self.supervisord_user = config['config'].get('supervisordUser', None)
self.supervisord_pass = config['config'].get('supervisordPass', None)
@ -184,7 +190,7 @@ class HealthCheck(object): @@ -184,7 +190,7 @@ class HealthCheck(object):
self.program_config = config
# 只保留通知action
self.notice_action = ['email', 'wechat', 'dingding']
self.notice_action = ['email', 'wechat', 'dingding', 'feishu']
self.periodSeconds = 5
self.failureThreshold = 3
@ -516,17 +522,17 @@ class HealthCheck(object): @@ -516,17 +522,17 @@ class HealthCheck(object):
if 'restart' in action_list:
restart_result = self.action_supervisor_restart(program)
msg += '\r\n Restart%s' % restart_result
msg += '\r\n**Restart**%s' % restart_result
elif 'exec' in action_list:
action_exec_cmd = config.get('action_exec_cmd')
exec_result = self.action_exec(program, action_exec_cmd)
msg += '\r\n Exec%s' % exec_result
msg += '\r\n**Exec**%s' % exec_result
elif 'kill' in action_list:
pid_get = config.get('pidGet', 'supervisor')
pid_file = config.get('pidFile', )
pid, err = self.get_pid(program, pid_get, pid_file)
kill_result = self.action_kill(program, pid)
msg += '\r\n Kill%s' % kill_result
msg += '\r\n**Kill**%s' % kill_result
if 'email' in action_list and self.mail_config:
self.action_email(program, action_type, msg, check_status)
@ -534,6 +540,8 @@ class HealthCheck(object): @@ -534,6 +540,8 @@ class HealthCheck(object):
self.action_wechat(program, action_type, msg, check_status)
if 'dingding' in action_list and self.dingding_config:
self.action_dingding(program, action_type, msg, check_status)
if 'feishu' in action_list and self.feishu_config:
self.action_feishu(program, action_type, msg, check_status)
def action_supervisor_restart(self, program):
"""
@ -710,6 +718,7 @@ class HealthCheck(object): @@ -710,6 +718,7 @@ class HealthCheck(object):
}
access_token_url = '/cgi-bin/gettoken?corpid={id}&corpsecret={crt}'.format(id=corpid, crt=secret)
try:
httpClient = httplib.HTTPSConnection(host, timeout=10)
httpClient.request("GET", access_token_url, headers=headers)
@ -806,13 +815,14 @@ class HealthCheck(object): @@ -806,13 +815,14 @@ class HealthCheck(object):
else:
title = "[%s] Health check failed" % program
data = {"msgtype": "markdown",
"markdown": {
"title": title,
"text": "#### 详情信息: \n> Program:%s \n\n> DataTime: %s \n\n> Hostname: %s \n\n> Platfrom: %s \n\n> Msg:%s" % (
program, curr_dt, hostname, system_platform, msg)
}
}
data = {
"msgtype": "markdown",
"markdown": {
"title": title,
"text": "#### 详情信息: \n> Program:%s \n\n> DataTime: %s \n\n> Hostname: %s \n\n> Platfrom: %s \n\n> Msg:%s" % (
program, curr_dt, hostname, system_platform, msg)
}
}
try:
httpClient = httplib.HTTPSConnection(host, timeout=10)
@ -832,6 +842,125 @@ class HealthCheck(object): @@ -832,6 +842,125 @@ class HealthCheck(object):
self.log(program, '[Action: dingding] send success')
return True
def action_feishu(self, program, action_type, msg, check_status):
"""
飞书通知
:param program:
:param action_type:
:param msg:
:param check_status:
:return:
"""
host = "open.feishu.cn"
secret = self.feishu_config.get('secret')
webhook = self.feishu_config.get('webhook')
headers = {
'Content-Type': 'application/json'
}
send_url = "/open-apis/bot/v2/hook/{webhook}".format(webhook=webhook)
ip = ""
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
try:
s.connect(('8.8.8.8', 80))
ip = s.getsockname()[0]
except Exception as e:
self.log(program, '[Action: feishu] get ip error %s' % e)
finally:
s.close()
hostname = platform.node().split('.')[0]
system_platform = platform.platform()
curr_dt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
if check_status == 'success':
title = "[Supervisor] %s Health check successful" % program
title_color = "green"
else:
title = "[Supervisor] %s Health check failed" % program
title_color = "red"
content = "**DataTime**: {curr_dt}\n**Program**: {program}\n**IP**: {ip}\n**Hostname**: {hostname}\n**Platfrom**: {platfrom}\n**Action**: {action}\n**Msg**: {msg}".format(
curr_dt=curr_dt, program=program, ip=ip, hostname=hostname,
platfrom=system_platform, action=action_type, msg=msg)
data = {
"msg_type": "interactive",
"card": {
"config": {
"wide_screen_mode": True,
"enable_forward": True
},
"header": {
"title": {
"content": title,
"tag": "plain_text"
},
"template": title_color
},
"elements": [{
"tag": "div",
"text": {
"content": "详细信息:",
"tag": "lark_md"
},
"fields": [
{
"is_short": False,
"text": {
"tag": "lark_md",
"content": content
}
}]
}]
}
}
if secret != "":
msg = ""
timestamp = ""
if PY3:
timestamp = str(round(time.time()))
key = '{}\n{}'.format(timestamp, secret)
key_enc = key.encode('utf-8')
msg_enc = msg.encode('utf-8')
else:
print("python2")
timestamp = long(round(time.time()))
key = '{}\n{}'.format(timestamp, secret)
key_enc = bytes(key).encode('utf-8')
msg_enc = bytes(msg).encode('utf-8')
hmac_code = hmac.new(key_enc, msg_enc, digestmod=sha256).digest()
sign = base64.b64encode(hmac_code).decode('utf-8')
data['timestamp'] = timestamp
data['sign'] = sign
print(data)
httpClient = httplib.HTTPSConnection(host, timeout=10)
try:
httpClient.request("POST", send_url, json.dumps(data), headers=headers)
response = httpClient.getresponse()
result = json.loads(response.read())
if result.get('StatusCode', 1) != 0:
self.log(program, '[Action: feishu] send faild %s' % result)
return False
except Exception as e:
self.log(program, '[Action: feishu] send error [%s] %s' % (result, e))
return False
finally:
if httpClient:
httpClient.close()
self.log(program, '[Action: feishu] send success')
return True
def start(self):
"""
启动检测
@ -900,7 +1029,10 @@ config: # 脚本配置名称,请勿更 @@ -900,7 +1029,10 @@ config: # 脚本配置名称,请勿更
# totag:
# dingding: # 钉钉通知配置
access_token:
# feishu: # 飞书通知配置
webhook:
secret:
# 内存方式监控
cat1: # supervisor中配置的program名称
type: mem # 检查类型: http,tcp,mem,cpu 默认: http
@ -912,9 +1044,9 @@ cat1: # supervisor中配置的program名称 @@ -912,9 +1044,9 @@ cat1: # supervisor中配置的program名称
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart
action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
sendResolved: True # 是否发送恢复通知 默认: False
# cpu方式监控
cat2: # supervisor中配置的program名称
@ -926,9 +1058,9 @@ cat2: # supervisor中配置的program名称 @@ -926,9 +1058,9 @@ cat2: # supervisor中配置的program名称
initialDelaySeconds: 10 # 首次检查等待的时间(以秒为单位), 默认: 1
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart
action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
sendResolved: True # 是否发送恢复通知 默认: False
# HTTP方式监控
cat3:
@ -946,9 +1078,9 @@ cat3: @@ -946,9 +1078,9 @@ cat3:
timeoutSeconds: 5 # 检查超时的秒数, 默认: 3
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart
action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
sendResolved: True # 是否发送恢复通知 默认: False
# TCP方式监控
cat4:
@ -960,9 +1092,9 @@ cat4: @@ -960,9 +1092,9 @@ cat4:
timeoutSeconds: 5 # 检查超时的秒数, 默认: 3
failureThreshold: 3 # 检查成功后,最少连续检查失败多少次才被认定为失败, 默认: 3
successThreshold: 2 # 失败后检查成功的最小连续成功次数, 默认:1
action: restart,email # 触发的动作: restart,exec,kill,email,wechat (restart和exec互斥,同时设置时restart生效) 默认: restart
action: restart,email # 触发的动作: restart,exec,kill,email,wechat,dingding,feishu (restart,exec,kill互斥,同时设置时restart生效) 默认: restart
execCmd: command # action exec 的执行命令
sendResolved: True # 是否发送恢复通知,仅用作于email,wechat. 默认: False
sendResolved: True # 是否发送恢复通知 默认: False
"""
with open(config_file, 'w') as f:
f.write(example_config)

Loading…
Cancel
Save