|
|
|
#!/usr/bin/python
|
|
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
|
|
# @Time : 2019-10-15
|
|
|
|
# @Author : lework
|
|
|
|
# @Desc : 收集supervisor的进程状态信息,并将信息暴露给Prometheus。
|
|
|
|
|
|
|
|
# [program:supervisor_exporter]
|
|
|
|
# process_name=%(program_name)s
|
|
|
|
# command=/usr/bin/python /root/scripts/supervisor_exporter.py
|
|
|
|
# autostart=true
|
|
|
|
# autorestart=true
|
|
|
|
# redirect_stderr=true
|
|
|
|
# stdout_logfile=/var/log/supervisor/supervisor_exporter.log
|
|
|
|
# stdout_logfile_maxbytes=50MB
|
|
|
|
# stdout_logfile_backups=3
|
|
|
|
# buffer_size=10
|
|
|
|
|
|
|
|
import sys
|
|
|
|
from time import sleep
|
|
|
|
from supervisor.xmlrpc import SupervisorTransport
|
|
|
|
from prometheus_client import Gauge, Counter, CollectorRegistry ,generate_latest, start_http_server
|
|
|
|
|
|
|
|
PY2 = sys.version_info[0] == 2
|
|
|
|
PY3 = sys.version_info[0] == 3
|
|
|
|
|
|
|
|
if PY3:
|
|
|
|
from xmlrpc.client import Transport, ServerProxy, Fault
|
|
|
|
from http.server import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
else:
|
|
|
|
from xmlrpclib import Transport, ServerProxy, Fault
|
|
|
|
from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
|
|
|
|
|
|
|
|
|
|
|
|
def get_supervisord_conn(supervisord_url, supervisord_user, supervisord_pass):
|
|
|
|
"""
|
|
|
|
获取supervisor的连接
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
transport = SupervisorTransport(supervisord_user, supervisord_pass, supervisord_url)
|
|
|
|
s = ServerProxy('http://127.0.0.1', transport=transport)
|
|
|
|
return s
|
|
|
|
|
|
|
|
def is_runing(state):
|
|
|
|
state_info = {
|
|
|
|
# 'STOPPED': 0,
|
|
|
|
'STARTING': 10,
|
|
|
|
'RUNNING': 20
|
|
|
|
# 'BACKOFF': 30,
|
|
|
|
# 'STOPPING': 40
|
|
|
|
# 'EXITED': 100,
|
|
|
|
# 'FATAL': 200,
|
|
|
|
# 'UNKNOWN': 1000
|
|
|
|
}
|
|
|
|
if state in state_info.values():
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
def get_metrics():
|
|
|
|
collect_reg = CollectorRegistry(auto_describe=True)
|
|
|
|
|
|
|
|
try:
|
|
|
|
s = get_supervisord_conn(supervisord_url, supervisord_user, supervisord_pass)
|
|
|
|
data = s.supervisor.getAllProcessInfo()
|
|
|
|
except Exception as e:
|
|
|
|
print("unable to call supervisord: %s" % e)
|
|
|
|
return collect_reg
|
|
|
|
|
|
|
|
labels=('name', 'group')
|
|
|
|
|
|
|
|
metric_state = Gauge('state', "Process State", labelnames=labels, subsystem='supervisord', registry=collect_reg)
|
|
|
|
metric_exit_status=Gauge('exit_status', "Process Exit Status", labelnames=labels, subsystem='supervisord', registry=collect_reg)
|
|
|
|
metric_up = Gauge('up', "Process Up", labelnames=labels, subsystem='supervisord', registry=collect_reg)
|
|
|
|
metric_start_time_seconds=Counter('start_time_seconds', "Process start time", labelnames=labels, subsystem='supervisord', registry=collect_reg)
|
|
|
|
|
|
|
|
for item in data:
|
|
|
|
now = item.get('now', '')
|
|
|
|
group = item.get('group', '')
|
|
|
|
description = item.get('description', '')
|
|
|
|
stderr_logfile = item.get('stderr_logfile', '')
|
|
|
|
stop = item.get('stop', '')
|
|
|
|
statename = item.get('statename', '')
|
|
|
|
start = item.get('start', '')
|
|
|
|
state = item.get('state', '')
|
|
|
|
stdout_logfile = item.get('stdout_logfile', '')
|
|
|
|
logfile = item.get('logfile', '')
|
|
|
|
spawnerr = item.get('spawnerr', '')
|
|
|
|
name = item.get('name', '')
|
|
|
|
exitstatus = item.get('exitstatus', '')
|
|
|
|
|
|
|
|
labels = (name, group)
|
|
|
|
|
|
|
|
metric_state.labels(*labels).set(state)
|
|
|
|
metric_exit_status.labels(*labels).set(exitstatus)
|
|
|
|
|
|
|
|
if is_runing(state):
|
|
|
|
metric_up.labels(*labels).set(1)
|
|
|
|
metric_start_time_seconds.labels(*labels).inc(start)
|
|
|
|
else:
|
|
|
|
metric_up.labels(*labels).set(0)
|
|
|
|
|
|
|
|
return collect_reg
|
|
|
|
|
|
|
|
|
|
|
|
class myHandler(BaseHTTPRequestHandler):
|
|
|
|
def do_GET(self):
|
|
|
|
self.send_response(200)
|
|
|
|
self.send_header('Content-type','text/plain')
|
|
|
|
self.end_headers()
|
|
|
|
data=""
|
|
|
|
if self.path=="/":
|
|
|
|
data=b"hello, supervistor.\r\n\r\n/metrics"
|
|
|
|
elif self.path=="/metrics":
|
|
|
|
data=generate_latest(get_metrics())
|
|
|
|
else:
|
|
|
|
data=b"not found"
|
|
|
|
# Send the html message
|
|
|
|
self.wfile.write(data)
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
try:
|
|
|
|
supervisord_url = "unix:///var/run/supervisor.sock"
|
|
|
|
supervisord_user = ""
|
|
|
|
supervisord_pass = ""
|
|
|
|
|
|
|
|
PORT_NUMBER = 8081
|
|
|
|
#Create a web server and define the handler to manage the
|
|
|
|
#incoming request
|
|
|
|
server = HTTPServer(('', PORT_NUMBER), myHandler)
|
|
|
|
print('Started httpserver on port',PORT_NUMBER)
|
|
|
|
|
|
|
|
#Wait forever for incoming htto requests
|
|
|
|
server.serve_forever()
|
|
|
|
|
|
|
|
except KeyboardInterrupt:
|
|
|
|
print('^C received, shutting down the web server')
|
|
|
|
server.socket.close()
|
|
|
|
|