From 2007428466261489df9525f9f0ce4e8373583f5e Mon Sep 17 00:00:00 2001 From: lework Date: Thu, 15 Oct 2020 17:43:23 +0800 Subject: [PATCH] add --- bat/vmware-batch.bat | 28 ++- python/supervisor_healthCheck.py | 175 ++++++++------- shell/get_proc_mem.sh | 89 ++++++++ shell/keystore.sh | 75 +++++++ shell/kube-logging.sh | 171 +++++++++++++++ shell/linux采集/caiji.sh | 83 ++++++++ shell/linux采集/oos_noak.sh | 354 +++++++++++++++++++++++++++++++ shell/template.sh | 17 ++ shell/utils.sh | 102 +++++++++ 9 files changed, 1014 insertions(+), 80 deletions(-) create mode 100644 shell/get_proc_mem.sh create mode 100644 shell/keystore.sh create mode 100644 shell/kube-logging.sh create mode 100644 shell/linux采集/caiji.sh create mode 100644 shell/linux采集/oos_noak.sh create mode 100644 shell/utils.sh diff --git a/bat/vmware-batch.bat b/bat/vmware-batch.bat index fa55ba7..8096a8b 100644 --- a/bat/vmware-batch.bat +++ b/bat/vmware-batch.bat @@ -9,17 +9,27 @@ IF EXIST "%PROGRAMFILES(X86)%\VMWare\VMWare Workstation\vmrun.exe" SET VMwarePat IF EXIST "%PROGRAMFILES%\VMware\VMware VIX\vmrun.exe" SET VMwarePath=%PROGRAMFILES%\VMware\VMware VIX IF EXIST "%PROGRAMFILES(X86)%\VMware\VMware VIX\vmrun.exe" SET VMRUN=%PROGRAMFILES(X86)%\VMware\VMware VIX -:: -::set VMwarePath="C:\Program Files (x86)\VMware\VMware Workstation" +:: VMwareװַ +# set VMwarePath="C:\Program Files (x86)\VMware\VMware Workstation" +:: Ŀ¼ set VMpath="D:\Virtual Machines" -set VMname=CentOS_7.4_x64_node +:: +set VMname=CentOS_7.8_x64_node +:: set VMSnapshot=init +:: ½Ŀ set VMcount=5 -set VMowa="D:\vmware owa\CentOS_7.4_x64.ova" +:: owaģλ +set VMowa="D:\vmware owa\CentOS_7.8_x64_base.ova" +:: ģϵͳû set VMuser=root +:: ģϵͳ set VMpass=123456 -set VMipStart=10 +:: set VMnetwork=192.168.77 +:: ipʼַ +set VMipStart=130 + :init @@ -75,6 +85,7 @@ ping /n 2 127.1>nul set /p a=^> /etc/hostname; sudo sed -i 's/IPADDR=.*$/IPADDR="!ip!"/g' /etc/sysconfig/network-scripts/ifcfg-e*;/etc/init.d/network restart || sudo sed -i 's/address .*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui ) +echo. echo : for /l %%a in (1,1,%VMcount%) do ( set name=!VMname!%%a @@ -308,7 +322,7 @@ set name=!VMname!%%a set /a num=%VMipStart%+%%a-1 set ip=!VMnetwork!.!num! echo !name!:!ip! -vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/^IPADDR=.*/IPADDR=!ip!/g' /etc/sysconfig/network-scripts/ifcfg-ens33;/etc/init.d/network restart || sudo sed -i 's/^address.*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui +vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/IPADDR=.*$/IPADDR="!ip!"/g' /etc/sysconfig/network-scripts/ifcfg-e*;/etc/init.d/network restart || sudo sed -i 's/address .*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui ) goto wait diff --git a/python/supervisor_healthCheck.py b/python/supervisor_healthCheck.py index 6790bb5..9e37c02 100644 --- a/python/supervisor_healthCheck.py +++ b/python/supervisor_healthCheck.py @@ -4,10 +4,11 @@ # @Time : 2020-06-05 # @Author : lework # @Desc : 针对supervisor的应用进行健康检查 -# @Version : 1.5 +# @Version : 1.6 import os +import re import sys import time import json @@ -89,62 +90,39 @@ def get_proc_cpu(pid): return None return cpu_utilization - -def get_proc_rss(pid, cumulative=False): +def get_proc_mem(pid, type="rss"): """ 获取进程内存使用 :param pid: - :param cumulative: + :param type: :return: """ - pscommand = 'ps -orss= -p %s' - pstreecommand = 'ps ax -o "pid= ppid= rss="' - ProcInfo = namedtuple('ProcInfo', ['pid', 'ppid', 'rss']) - - def find_children(parent_pid, procs): - # 找出进程的子进程信息 - children = [] - for proc in procs: - pid, ppid, rss = proc - if ppid == parent_pid: - children.append(proc) - children.extend(find_children(pid, procs)) - return children - - if cumulative: - # 统计进程的子进程rss - _, data, _ = shell(pstreecommand) - data = data.strip() - - procs = [] - for line in data.splitlines(): - p_pid, p_ppid, p_rss = map(int, line.split()) - procs.append(ProcInfo(pid=p_pid, ppid=p_ppid, rss=p_rss)) - - # 计算rss - try: - parent_proc = [p for p in procs if p.pid == pid][0] - children = find_children(pid, procs) - tree = [parent_proc] + children - rss = sum(map(int, [p.rss for p in tree])) - except (ValueError, IndexError): - # 计算错误时,返回None - return None - - else: - _, data, _ = shell(pscommand % pid) - if not data: - # 未获取到数据值,或者没有此pid信息 - return None - try: - rss = data.strip() - rss = int(rss) - except ValueError: - # 获取的结果不包含数据,或者无法识别rss - return None - rss = rss / 1024 # rss 的单位是 KB, 这里返回MB单位 - return rss + smaps_file = "/proc/%s/smaps" % pid + smaps_data = "" + if not os.path.exists(smaps_file): + print("[Error] not found %s" % smaps_file) + return None + + try: + with open("/proc/%s/smaps" % (pid)) as f: + smaps_data = f.read().strip() + except Exception as e: + print("[Error] %s" % e) + return None + + if type == "rss": + rss_re = re.compile(br"\nRss\:\s+(\d+)") + data = sum(map(int, rss_re.findall(smaps_data))) + elif type == "pss": + pss_re = re.compile(br"\nPss\:\s+(\d+)") + data = sum(map(int, pss_re.findall(smaps_data))) + elif type == "uss": + private_re = re.compile(br"\nPrivate.*:\s+(\d+)") + data = sum(map(int, private_re.findall(smaps_data))) + + data = data / 1024 # rss 的单位是 KB, 这里返回MB单位 + return data class WorkerThread(threading.Thread): @@ -188,6 +166,7 @@ class HealthCheck(object): if 'config' in config: self.mail_config = config['config'].get('mail') self.wechat_config = config['config'].get('wechat') + self.dingding_config = config['config'].get('dingding') self.supervisord_url = config['config'].get('supervisordUrl', self.supervisord_url) self.supervisord_user = config['config'].get('supervisordUser', None) self.supervisord_pass = config['config'].get('supervisordPass', None) @@ -196,7 +175,7 @@ class HealthCheck(object): self.program_config = config # 只保留通知action - self.notice_action = ['email', 'wechat'] + self.notice_action = ['email', 'wechat', 'dingding'] self.periodSeconds = 5 self.failureThreshold = 3 @@ -204,8 +183,8 @@ class HealthCheck(object): self.initialDelaySeconds = 1 self.sendResolved = False - self.max_rss = 1024 - self.cumulative = False + self.mem_type = 'rss' + self.max_mem = 1024 self.max_cpu = 90 def get_supervisord_conn(self): @@ -359,7 +338,7 @@ class HealthCheck(object): check_state[program]['failure'] != 0 and check_state[program]['failure'] % ( (periodSeconds + initialDelaySeconds) * 2) == 0): action_param = { - 'config': config, + 'config': config, 'action_type': action_type, 'check_status': check_status, 'msg': check_result.get('msg', '') @@ -461,11 +440,11 @@ class HealthCheck(object): :return: dict """ program = config.get('program') - max_rss = config.get('maxRss', self.max_rss) - cumulative = config.get('cumulative', self.cumulative) + max_mem = config.get('maxMem', self.max_mem) + mem_type = config.get('memType', self.mem_type) pid_get = config.get('pidGet', 'supervisor') pid_file = config.get('pidFile', ) - check_info = 'max_rss:%sMB cumulative:%s' % (max_rss, cumulative) + check_info = 'max_mem:%sMB mem_type:%s' % (max_mem, mem_type) pid, err = self.get_pid(program, pid_get, pid_file) if pid == 0: @@ -473,13 +452,13 @@ class HealthCheck(object): return {'status': 'failure', 'msg': '[mem_check] program not starting, message: %s.' % err, 'info': check_info} - now_rss = get_proc_rss(pid, cumulative) - check_info = '%s now_rss:%sMB pid:%s' % (check_info, now_rss, pid) - if now_rss >= int(max_rss): - return {'status': 'failure', 'msg': '[mem_check] max_rss(%sMB) now_rss(%sMB)' % (max_rss, now_rss), + now_mem = get_proc_mem(pid, mem_type) + check_info = '%s now_mem:%sMB pid:%s' % (check_info, now_mem, pid) + if now_mem >= int(max_mem): + return {'status': 'failure', 'msg': '[mem_check] max_mem(%sMB) now_mem(%sMB)' % (max_mem, now_mem), 'info': check_info} - return {'status': 'success', 'msg': '[mem_check] max_rss(%sMB) now_rss(%sMB)' % (max_rss, now_rss), + return {'status': 'success', 'msg': '[mem_check] max_mem(%sMB) now_mem(%sMB)' % (max_mem, now_mem), 'info': check_info} def cpu_check(self, config): @@ -522,7 +501,7 @@ class HealthCheck(object): msg = args.get('msg') check_status = args.get('check_status') config = args.get('config') - + self.log(program, '[Action: %s]', action_type) action_list = action_type.split(',') @@ -544,6 +523,8 @@ class HealthCheck(object): self.action_email(program, action_type, msg, check_status) if 'wechat' in action_list and self.wechat_config: self.action_wechat(program, action_type, msg, check_status) + if 'dingding' in action_list and self.dingding_config: + self.action_dingding(program, action_type, msg, check_status) def action_supervisor_restart(self, program): """ @@ -601,19 +582,19 @@ class HealthCheck(object): self.log(program, "[Action: exec] result %s", result) return result - + def action_kill(self, program, pid): """ 杀死进程 :param program: :param pid: :return: - """ + """ result = 'success' - + if int(pid) < 3: - return 'Failed to kill %s, pid: %s '% (program, pid) - + return 'Failed to kill %s, pid: %s ' % (program, pid) + cmd = "kill -9 %s" % pid exitcode, stdout, stderr = shell(cmd) @@ -785,6 +766,51 @@ class HealthCheck(object): self.log(program, '[Action: wechat] send success') return True + def action_dingding(self, program, action_type, msg, check_status): + curr_dt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + + hostname = platform.node().split('.')[0] + system_platform = platform.platform() + + host = "oapi.dingtalk.com" + access_token = self.dingding_config.get('access_token') + send_url = '/robot/send?access_token=={access_token}'.format(token=access_token) + + headers = { + 'Content-Type': 'application/json' + } + + if check_status == 'success': + title = "[%s] Health check successful" % program + else: + title = "[%s] Health check failed" % program + + send_data = {"msgtype": "markdown", + "markdown": { + "title": title, + "text": "#### 详情信息: \n> Program:%s \n\n> DataTime: %s \n\n> Hostname: %s \n\n> Platfrom: %s \n\n> Msg:%s" % ( + program, curr_dt, hostname, system_platform, msg) + } + } + + try: + httpClient = httplib.HTTPSConnection(host, timeout=10) + httpClient.request("POST", send_url, json.dumps(data), headers=headers) + response = httpClient.getresponse() + result = json.loads(response.read()) + if result['errcode'] != 0: + self.log(program, '[Action: dingding] send faild %s' % result) + return False + except Exception as e: + self.log(program, '[Action: dingding] send error %s' % e) + return False + finally: + if httpClient: + httpClient.close() + + self.log(program, '[Action: dingding] send success') + return True + def start(self): """ 启动检测 @@ -804,10 +830,10 @@ class HealthCheck(object): for t in threads: t.setDaemon(True) t.start() - + while 1: time.sleep(0.1) - for i,t in enumerate(threads): + for i, t in enumerate(threads): if not t.isAlive(): thread_name = t.getName() self.log('ERROR', 'Exception in %s (catch by main): %s' % (thread_name, t.get_exception())) @@ -825,6 +851,7 @@ if __name__ == '__main__': print("Exit check!") sys.exit(0) + signal.signal(signal.SIGINT, sig_handler) signal.signal(signal.SIGTERM, sig_handler) signal.signal(signal.SIGQUIT, sig_handler) @@ -850,12 +877,14 @@ config: # 脚本配置名称,请勿更 # touser: # toparty: # totag: +# dingding: # 钉钉通知配置 + access_token: # 内存方式监控 cat1: # supervisor中配置的program名称 type: mem # 检查类型: http,tcp,mem,cpu 默认: http - maxRss: 1024 # 内存阈值, 超过则为检测失败. 单位MB, 默认: 1024 - cumulative: True # 是否统计子进程的内存, 默认: False + maxMem: 1024 # 内存阈值, 超过则为检测失败. 单位MB, 默认: 1024 + memType: rss # 内存使用分类:rss, pss, uss 默认:rss pidGet: supervisor # 获取pid的方式: supervisor,name,file, 选择name时,按program名称搜索pid,选择file时,需指定pidFile 默认: supervisor pidFile: /var/run/t.pid # 指定pid文件的路径, 只在pidGet为file的时候有用 periodSeconds: 10 # 检查的频率(以秒为单位), 默认: 5 diff --git a/shell/get_proc_mem.sh b/shell/get_proc_mem.sh new file mode 100644 index 0000000..430b495 --- /dev/null +++ b/shell/get_proc_mem.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash + + +pid=$1 +retries="${2:-0}" +wait="${3:-1}" +pid_smaps="" + + +function get_meminfo() { + [ ! -f "/proc/${pid}/smaps" ] \ + && { echo "[Error] not found $pid smaps file."; echo "Usage: bash $0 Pid Retries Wait, like: bash$0 1234 100 5"; exit 1; } \ + || pid_smaps=$(cat /proc/${pid}/smaps) + + mem_info=$(cat /proc/meminfo) + + mem_total=$(printf "%s" "${mem_info}"| awk '/^MemTotal:/ {print $2}') + mem_free=$(printf "%s" "${mem_info}"| awk '/^MemFree:/ {print $2}') + mem_available=$(printf "%s" "${mem_info}"| awk '/^MemAvailable:/ {print $2}') + size=$(printf "%s" "${pid_smaps}" | awk '/^Size/{sum += $2}END{print sum}') + rss=$(printf "%s" "${pid_smaps}" | awk '/^Rss/{sum += $2}END{print sum}') + pss=$(printf "%s" "${pid_smaps}" | awk '/^Pss/{sum += $2}END{print sum}') + + shared_clean=$(printf "%s" "${pid_smaps}" | awk '/^Shared_Clean/{sum += $2}END{print sum}') + shared_dirty=$(printf "%s" "${pid_smaps}" | awk '/^Shared_Dirty/{sum += $2}END{print sum}') + private_clean=$(printf "%s" "${pid_smaps}" | awk '/^Private_Clean/{sum += $2}END{print sum}') + private_dirty=$(printf "%s" "${pid_smaps}" | awk '/^Private_Dirty/{sum += $2}END{print sum}') + swap=$(printf "%s" "${pid_smaps}" | awk '/^Swap/{sum += $2}END{print sum}') + swap_pss=$(printf "%s" "${pid_smaps}" | awk '/^SwapPss/{sum += $2}END{print sum}') +} + +count=0 +while [ $count -lt $retries ] ; do + get_meminfo + echo "Date: $(date +'%Y-%m-%d %T') MemTotal: $((mem_total/1024))MB MemFree: $((mem_free/1024))MB MemAvailable: $((mem_available/1024))MB RSS: $((${rss}/1024))MB PSS: $((${pss}/1024))MB USS: $(( (${private_clean} + ${private_dirty}) /1024 ))MB" + sleep $wait + count=$(($count + 1)) +done + + +get_meminfo + +cat << EOF + +# OS meminfo +MemTotal:内存总数 +MemFree:空闲内存数 +MemAvailable:可用内存数,包括cache/buffer、slab + +# Process smaps +Size:表示该映射区域在虚拟内存空间中的大小。 +Rss: 表示该映射区域当前在物理内存中占用了多少空间 + Rss=Shared_Clean+Shared_Dirty+Private_Clean+Private_Dirty +Pss: 该虚拟内存区域平摊计算后使用的物理内存大小(有些内存会和其他进程共享,例如mmap进来的) + 实际上包含下面private_clean+private_dirty,和按比例均分的shared_clean、shared_dirty。 +Uss: Unique Set Size 进程独自占用的物理内存(不包含共享库占用的内存) + USS=Private_Clean+Private_Dirty +Shared_Clean: 和其他进程共享的未被改写的page的大小 +Shared_Dirty: 和其他进程共享的被改写的page的大小 +Private_Clean: 未被改写的私有页面的大小。 +Private_Dirty: 已被改写的私有页面的大小。 +Swap: 存在于交换分区的数据大小(如果物理内存有限,可能存在一部分在主存一部分在交换分区) +SwapPss: 计算逻辑就跟pss一样,只不过针对的是交换分区的内存。 + +Pid: ${pid} +Cmd: $(tr -d '\0' < /proc/${pid}/cmdline | cut -c1-80) +User: $(id -nu < /proc/${pid}/loginuid ) +Threads: $(awk '/Threads:/ {print $2}' /proc/${pid}/status) + +File: /proc/${pid}/smaps + +# Os meminfo +MemTotal: ${mem_total} KB +MemFree: ${mem_free} KB +MemAvailable: ${mem_available} KB + +# Process smaps +Size: ${size} KB +RSS: ${rss} kB +PSS: ${pss} kB +Shared_Clean: ${shared_clean} kB +Shared_Dirty: ${shared_dirty} kB +Private_Clean: ${private_clean} kB +Private_Dirty: ${private_dirty} kB +Swap: ${swap} kB +SwapPss: ${swap_pss} kB + +USS: ${private_clean} + ${private_dirty} = $(( ${private_clean} + ${private_dirty} )) kB +EOF diff --git a/shell/keystore.sh b/shell/keystore.sh new file mode 100644 index 0000000..f7cbb5e --- /dev/null +++ b/shell/keystore.sh @@ -0,0 +1,75 @@ +#!/bin/bash + +# 设置环境变量 +BASE_DIR=$(PWD)# 你需要修改此处 +CERT_OUTPUT_PATH="$BASE_DIR/certificates" +PASSWORD=test1234 +KEY_STORE="$CERT_OUTPUT_PATH/server.keystore.jks" +TRUST_STORE="$CERT_OUTPUT_PATH/server.truststore.jks" +CLIENT_KEY_STORE="$CERT_OUTPUT_PATH/client.keystore.jks" +CLIENT_TRUST_STORE="$CERT_OUTPUT_PATH/client.truststore.jks" +KEY_PASSWORD=$PASSWORD +STORE_PASSWORD=$PASSWORD +TRUST_KEY_PASSWORD=$PASSWORD +TRUST_STORE_PASSWORD=$PASSWORD +CERT_AUTH_FILE="$CERT_OUTPUT_PATH/ca-cert" +DAYS_VALID=3650 +DNAME="CN=Test, OU=YourDept, O=YourCompany, L=Shanghai, ST=Shanghai, C=CN" +SUBJ="/C=CN/ST=Shanghai/L=Shanghai/O=YourCompany/OU=YourDept,CN=Test" + +mkdir -p $CERT_OUTPUT_PATH + +echo "1. 产生 key 和证书......" +keytool -keystore $KEY_STORE -alias kafka-server -validity $DAYS_VALID -genkey -keyalg RSA \ +-storepass $STORE_PASSWORD -keypass $KEY_PASSWORD -dname "$DNAME" + +keytool -keystore $CLIENT_KEY_STORE -alias kafka-client -validity $DAYS_VALID -genkey -keyalg RSA \ +-storepass $STORE_PASSWORD -keypass $KEY_PASSWORD -dname "$DNAME" + +echo "2. 创建 CA......" +openssl req -new -x509 -keyout $CERT_OUTPUT_PATH/ca-key -out "$CERT_AUTH_FILE" -days "$DAYS_VALID" \ +-passin pass:"$PASSWORD" -passout pass:"$PASSWORD" \ +-subj "$SUBJ" + +echo "3. 添加 CA 文件到 broker truststore......" +keytool -keystore "$TRUST_STORE" -alias CARoot \ +-importcert -file "$CERT_AUTH_FILE" -storepass "$TRUST_STORE_PASSWORD" -keypass "$TRUST_KEY_PASS" -noprompt + +echo "4. 添加 CA 文件到 client truststore......" +keytool -keystore "$CLIENT_TRUST_STORE" -alias CARoot \ +-importcert -file "$CERT_AUTH_FILE" -storepass "$TRUST_STORE_PASSWORD" -keypass "$TRUST_KEY_PASS" -noprompt + +echo "5. 从 keystore 中导出集群证书......" +keytool -keystore "$KEY_STORE" -alias kafka-server -certreq -file "$CERT_OUTPUT_PATH/server-cert-file" \ +-storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt + +keytool -keystore "$CLIENT_KEY_STORE" -alias kafka-client -certreq -file "$CERT_OUTPUT_PATH/client-cert-file" \ +-storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt + +echo "6. 使用 CA 签发证书......" +openssl x509 -req -CA "$CERT_AUTH_FILE" -CAkey $CERT_OUTPUT_PATH/ca-key -in "$CERT_OUTPUT_PATH/server-cert-file" \ +-out "$CERT_OUTPUT_PATH/server-cert-signed" -days "$DAYS_VALID" -CAcreateserial -passin pass:"$PASSWORD" + +openssl x509 -req -CA "$CERT_AUTH_FILE" -CAkey $CERT_OUTPUT_PATH/ca-key -in "$CERT_OUTPUT_PATH/client-cert-file" \ +-out "$CERT_OUTPUT_PATH/client-cert-signed" -days "$DAYS_VALID" -CAcreateserial -passin pass:"$PASSWORD" + +echo "7. 导入 CA 文件到 keystore......" +keytool -keystore "$KEY_STORE" -alias CARoot -import -file "$CERT_AUTH_FILE" -storepass "$STORE_PASSWORD" \ + -keypass "$KEY_PASSWORD" -noprompt + +keytool -keystore "$CLIENT_KEY_STORE" -alias CARoot -import -file "$CERT_AUTH_FILE" -storepass "$STORE_PASSWORD" \ + -keypass "$KEY_PASSWORD" -noprompt + +echo "8. 导入已签发证书到 keystore......" +keytool -keystore "$KEY_STORE" -alias kafka-server -import -file "$CERT_OUTPUT_PATH/server-cert-signed" \ + -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt + +keytool -keystore "$CLIENT_KEY_STORE" -alias kafka-client -import -file "$CERT_OUTPUT_PATH/client-cert-signed" \ + -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt + +echo "9. 删除临时文件......" +rm "$CERT_OUTPUT_PATH/ca-cert.srl" +rm "$CERT_OUTPUT_PATH/server-cert-signed" +rm "$CERT_OUTPUT_PATH/client-cert-signed" +rm "$CERT_OUTPUT_PATH/server-cert-file" +rm "$CERT_OUTPUT_PATH/client-cert-file" \ No newline at end of file diff --git a/shell/kube-logging.sh b/shell/kube-logging.sh new file mode 100644 index 0000000..8a175b3 --- /dev/null +++ b/shell/kube-logging.sh @@ -0,0 +1,171 @@ +#!/usr/bin/env bash + +# Copyright 2014 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Controls verbosity of the script output and logging. +KUBE_VERBOSE="${KUBE_VERBOSE:-2}" + +# Handler for when we exit automatically on an error. +# Borrowed from https://gist.github.com/ahendrix/7030300 +kube::log::errexit() { + local err="${PIPESTATUS[*]}" + + # If the shell we are in doesn't have errexit set (common in subshells) then + # don't dump stacks. + set +o | grep -qe "-o errexit" || return + + set +o xtrace + local code="${1:-1}" + # Print out the stack trace described by $function_stack + if [ ${#FUNCNAME[@]} -gt 2 ] + then + kube::log::error "Call tree:" + for ((i=1;i<${#FUNCNAME[@]}-1;i++)) + do + kube::log::error " ${i}: ${BASH_SOURCE[${i}+1]}:${BASH_LINENO[${i}]} ${FUNCNAME[${i}]}(...)" + done + fi + kube::log::error_exit "Error in ${BASH_SOURCE[1]}:${BASH_LINENO[0]}. '${BASH_COMMAND}' exited with status ${err}" "${1:-1}" 1 +} + +kube::log::install_errexit() { + # trap ERR to provide an error handler whenever a command exits nonzero this + # is a more verbose version of set -o errexit + trap 'kube::log::errexit' ERR + + # setting errtrace allows our ERR trap handler to be propagated to functions, + # expansions and subshells + set -o errtrace +} + +# Print out the stack trace +# +# Args: +# $1 The number of stack frames to skip when printing. +kube::log::stack() { + local stack_skip=${1:-0} + stack_skip=$((stack_skip + 1)) + if [[ ${#FUNCNAME[@]} -gt ${stack_skip} ]]; then + echo "Call stack:" >&2 + local i + for ((i=1 ; i <= ${#FUNCNAME[@]} - stack_skip ; i++)) + do + local frame_no=$((i - 1 + stack_skip)) + local source_file=${BASH_SOURCE[${frame_no}]} + local source_lineno=${BASH_LINENO[$((frame_no - 1))]} + local funcname=${FUNCNAME[${frame_no}]} + echo " ${i}: ${source_file}:${source_lineno} ${funcname}(...)" >&2 + done + fi +} + +# Log an error and exit. +# Args: +# $1 Message to log with the error +# $2 The error code to return +# $3 The number of stack frames to skip when printing. +kube::log::error_exit() { + local message="${1:-}" + local code="${2:-1}" + local stack_skip="${3:-0}" + stack_skip=$((stack_skip + 1)) + + if [[ ${KUBE_VERBOSE} -ge 4 ]]; then + local source_file=${BASH_SOURCE[${stack_skip}]} + local source_line=${BASH_LINENO[$((stack_skip - 1))]} + echo "!!! Error in ${source_file}:${source_line}" >&2 + [[ -z ${1-} ]] || { + echo " ${1}" >&2 + } + + kube::log::stack ${stack_skip} + + echo "Exiting with status ${code}" >&2 + fi + + exit "${code}" +} + +# Log an error but keep going. Don't dump the stack or exit. +kube::log::error() { + timestamp=$(date +"[%m%d %H:%M:%S]") + echo "!!! ${timestamp} ${1-}" >&2 + shift + for message; do + echo " ${message}" >&2 + done +} + +# Print an usage message to stderr. The arguments are printed directly. +kube::log::usage() { + echo >&2 + local message + for message; do + echo "${message}" >&2 + done + echo >&2 +} + +kube::log::usage_from_stdin() { + local messages=() + while read -r line; do + messages+=("${line}") + done + + kube::log::usage "${messages[@]}" +} + +# Print out some info that isn't a top level status line +kube::log::info() { + local V="${V:-0}" + if [[ ${KUBE_VERBOSE} < ${V} ]]; then + return + fi + + for message; do + echo "${message}" + done +} + +# Just like kube::log::info, but no \n, so you can make a progress bar +kube::log::progress() { + for message; do + echo -e -n "${message}" + done +} + +kube::log::info_from_stdin() { + local messages=() + while read -r line; do + messages+=("${line}") + done + + kube::log::info "${messages[@]}" +} + +# Print a status line. Formatted to show up in a stream of output. +kube::log::status() { + local V="${V:-0}" + if [[ ${KUBE_VERBOSE} < ${V} ]]; then + return + fi + + timestamp=$(date +"[%m%d %H:%M:%S]") + echo "+++ ${timestamp} ${1}" + shift + for message; do + echo " ${message}" + done +} diff --git a/shell/linux采集/caiji.sh b/shell/linux采集/caiji.sh new file mode 100644 index 0000000..2d41e8a --- /dev/null +++ b/shell/linux采集/caiji.sh @@ -0,0 +1,83 @@ +#!/bin/bash +# create log file folder +test -e /var/log/ecsanalyse || mkdir /var/log/ecsanalyse; +datetime=$(date +%Y%m%d-%H-%M-%S-%N) +log_filename=ecs_analyse_${datetime}.log +log_file=/var/log/ecsanalyse/$log_filename + +# script start------------ +echo "##*problem_total_analyse" >> $log_file 2>&1 +echo "###dos-ff" >> $log_file 2>&1 +file /etc/passwd >> $log_file 2>&1 +file /etc/shadow >> $log_file 2>&1 +file /etc/pam.d/* >> $log_file 2>&1 +echo "###limits" >> $log_file 2>&1 +cat /etc/security/limits.conf | grep -Ev "^$|[#;]" >> $log_file 2>&1 +echo "###virtio-net-multiqueue" >> $log_file 2>&1 +for i in $(ip link | grep -E "^[0-9]+: .*:" -o | cut -d ":" -f 2 | grep -v lo); do echo $i >> $log_file 2>&1; ethtool -l $i 2>/dev/null | grep Combined >> $log_file 2>&1; done +echo "###passwd" >> $log_file 2>&1 +cat /etc/passwd >> $log_file 2>&1 +echo "###cpu-top-5" >> $log_file 2>&1 +top -b -n 1 | grep "%Cpu(s):" >> $log_file 2>&1 +ps -eT -o%cpu,pid,tid,ppid,comm | grep -v CPU | sort -n -r | head -5 >> $log_file 2>&1 +echo "###ssh-perm" >> $log_file 2>&1 +echo "***centos" >> $log_file 2>&1 +ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /var/empty/* /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1 +echo "***ubuntu" >> $log_file 2>&1 +ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1 +echo "***debian" >> $log_file 2>&1 +ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1 +echo "###blkid" >> $log_file 2>&1 +blkid >> $log_file 2>&1 +echo "###osinfo" >> $log_file 2>&1 +if test -f "/etc/os-release"; then +cat /etc/os-release | egrep "^NAME=|^VERSION=" >> $log_file 2>&1 +else +echo "no os-release" >> $log_file 2>&1 +echo "no os-release" >> $log_file 2>&1 +fi +if test -f "/etc/redhat-release" ; then +echo "redhat-release:" $(cat /etc/redhat-release) >> $log_file 2>&1 +else +echo "no redhat-release" >> $log_file 2>&1 +fi +echo "uname: " $(uname -a) >> $log_file 2>&1 +echo "uname short\: " $(uname -r) >> $log_file 2>&1 +echo "###softlink" >> $log_file 2>&1 +ls -l / | grep "\->" >> $log_file 2>&1 +echo "###iptables" >> $log_file 2>&1 +echo "***centos-5" >> $log_file 2>&1 +service iptables status >> $log_file 2>&1 +echo "***centos-6" >> $log_file 2>&1 +service iptables status >> $log_file 2>&1 +echo "***centos-7" >> $log_file 2>&1 +firewall-cmd --state >> $log_file 2>&1 +echo "***ubuntu" >> $log_file 2>&1 +ufw status >> $log_file 2>&1 +echo "***default" >> $log_file 2>&1 +iptables -L >> $log_file 2>&1 +echo "###sysctl" >> $log_file 2>&1 +cat /etc/sysctl.conf | grep nr_hugepages >> $log_file 2>&1 +echo -e "net.ipv4.tcp_tw_recycle=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_tw_recycle >> $log_file 2>&1 +echo -e "net.ipv4.tcp_timestamps=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_timestamps >> $log_file 2>&1 +echo -e "fs.nr_open=\c" >> $log_file 2>&1 && cat /proc/sys/fs/nr_open >> $log_file 2>&1 +echo -e "net.ipv4.tcp_sack=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_sack >> $log_file 2>&1 +echo "###fstab" >> $log_file 2>&1 +cat /etc/fstab | grep -Ev "^$|[#;]" >> $log_file 2>&1 +echo "###dmesg" >> $log_file 2>&1 +cat /proc/uptime >> $log_file 2>&1 +dmesg | grep "invoked oom-killer" | tail -n 1 >> $log_file 2>&1 +echo "###port-usage" >> $log_file 2>&1 +netstat -tapn | grep LISTEN | grep -E 'sshd' >> $log_file 2>&1 +echo "###selinux" >> $log_file 2>&1 +echo "***default" >> $log_file 2>&1 +getenforce >> $log_file 2>&1 +echo "***ubuntu" >> $log_file 2>&1 +service selinux status > /dev/null; echo $? >> $log_file 2>&1 +echo "***debian" >> $log_file 2>&1 +service selinux status > /dev/null; echo $? >> $log_file 2>&1 +echo "###meminfo" >> $log_file 2>&1 +cat /proc/meminfo | grep Hugepagesize >> $log_file 2>&1 +cat /proc/meminfo | grep MemTotal >> $log_file 2>&1 +# script end------------ + diff --git a/shell/linux采集/oos_noak.sh b/shell/linux采集/oos_noak.sh new file mode 100644 index 0000000..6a71660 --- /dev/null +++ b/shell/linux采集/oos_noak.sh @@ -0,0 +1,354 @@ +#!/bin/bash +# create log file folder + +test -e /var/log/ecsanalyse || mkdir -p /var/log/ecsanalyse; +datetime=$(date +%Y%m%d-%H-%M-%S-%N) +log_filename=ecs_analyse_${datetime}.log +log_file=/var/log/ecsanalyse/$log_filename + +#set var +regionId=$(curl -s http://100.100.100.200/latest/meta-data/region-id) +mount_dir=/mnt +disk_vdb=$(blkid |grep -v vda | awk -F : '{print $1}') +fstab=/mnt/etc/fstab +grub_f=/mnt/boot/grub/grub.cfg +grub2_f=/mnt/boot/grub2/grub.cfg +selinux_config_file=/mnt/etc/selinux/config +sysctl_config_file=/mnt/etc/sysctl.conf +rc_local_file=/mnt/etc/rc.local +sshdconfig_file=/mnt/etc/ssh/sshd_config +passfile=/mnt/etc/passwd +passbakfile="/mnt/etc/passwd-" +shadowfile=/mnt/etc/shadow +shadowbakfile="/mnt/etc/shadow-" +system_user=(root daemon nobody dbus polkitd sshd messagebus) +system_dir=(/mnt/bin /mnt/sbin /mnt/usr/bin /mnt/usr/sbin /mnt/lib /mnt/lib64 /mnt/usr/lib /mnt/usr/lib64 /mnt/etc /mnt/boot /mnt/var /mnt/var) +###change new instance root passwd to rand passwd +newpw=$(openssl rand -base64 10) +echo root:${newpw}|chpasswd +echo "The new instance passwd is ${newpw}" >>$log_file 2>&1 + +###define upload log function +function upload_log(){ + cd /var/log/ecsanalyse; + test -e ossutil64 || wget http://gosspublic.alicdn.com/ossutil/1.6.10/ossutil64 && chmod 755 /var/log/ecsanalyse/ossutil64; + /var/log/ecsanalyse/ossutil64 -i your_ak -k your_sk -e oss-cn-beijing.aliyuncs.com cp -f $log_file oss://public-beijing/ooslogs/ + +} + +###yuanyuan checkdisk and mount disk +function check_vdb_is_exist() +{ +if [ ! -n "$disk_vdb" ] +then + echo "Disk can not find in this system ,exit " >>$log_file 2>&1 + upload_log + exit +fi +} + + +function mount_disk() +{ + test -e $mount_dir || mkdir -p $mount_dir + if mountpoint -q $mount_dir;then umount $mount_dir;fi + mount $disk_vdb $mount_dir >>$log_file 2>&1 +} + +function get_disk_usage() +{ + disk_usage=$(df -h $mount_dir | tail -n 1 | awk '{print $5}') + disk_inode_usage=$(df -i $mount_dir | tail -n 1| awk '{print $5}') + echo "disk space usage: $disk_usage" >>$log_file 2>&1 + echo "disk inode usage: $disk_inode_usage" >>$log_file 2>&1 +} + +function repair_fs() +{ + if grep -qs "$mount_dir" /proc/mounts + then + echo "Check MNT dir is mounted,umount it" >>$log_file 2>&1 + umount $mount_dir + fi + + fs_type=$(blkid | grep -v vda ) + if [[ "$fs_type" =~ "ext" ]] + then + if [[ "$(tune2fs -l $disk_vdb|grep state)" =~ "clean" ]] + then + echo "Extfs is clean ,dont need fsck" >>$log_file 2>&1 + else + echo "This line means that the file system need fsck,its ext" >>$log_file 2>&1 + fsck -y $disk_vdb >>$log_file 2>&1 + fi + else + echo "This line means that the file system is xfs " >>$log_file 2>&1 + xfs_repair $disk_vdb >>$log_file 2>&1 + fi +} + +function main(){ +check_vdb_is_exist +repair_fs +mount_disk && get_disk_usage +} + +###muyuan Check the existence of system important directories +function check_sys_important_dir(){ + for important_dir in ${system_dir[@]} + do + if [ ! -e $important_dir ] + then + echo "This sys dir or link $important_dir does not exist " >> $log_file 2>&1 + fi + done + echo "Check the existence of system important directories...done" >> $log_file 2>&1 +} +###install dos2unix +function install_dos2unix(){ + if ! which dos2unix ;then + yum -y install dos2unix >> $log_file 2>&1 + sleep 5s + dos2unix ${passfile} >> $log_file 2>&1 + dos2unix ${shadowfile} >> $log_file 2>&1 + else + dos2unix ${passfile} >> $log_file 2>&1 + dos2unix ${shadowfile} >> $log_file 2>&1 + fi +} +###check / 777 +function check_rootdir_permission(){ + for dirfile in $( ls -l /mnt |grep rwxrwx|egrep -v "\->|tmp"|awk '{print "/mnt/"$NF}'); + do + if [ -d "$dirfile" ] + then + echo "This dir ${dirfile} permission is 777,chmod to 755" >> $log_file 2>&1 + chmod -R 755 ${dirfile} >> $log_file 2>&1 + else + echo "This file ${dirfile} permission is 777,chmod to 644" >> $log_file 2>&1 + chmod 644 ${dirfile} >> $log_file 2>&1 + fi + done + echo "Check /mnt dir permission 777 ...done " >> $log_file 2>&1 +} +###check ssh 777 +function check_ssh_permission(){ + for sshfile in $(ls -l /mnt/etc/ssh | grep rwxrwx | awk '{print "/mnt/etc/ssh/"$NF}') + do + if [ "${sshfile##*.}"x = "pub"x ] || [[ "${sshfile}"x =~ "_config"x ]] || [[ "${sshfile}"x =~ "moduli"x ]] + then + echo "Change file ${sshfile} permission to 644" >> $log_file 2>&1 + chmod 644 ${sshfile} >> $log_file 2>&1 + else + echo "Change file ${sshfile} permission to 600" >> $log_file 2>&1 + chmod 600 ${sshfile} >> $log_file 2>&1 + fi + done + echo "Check /mnt/etc/ssh dir permission 777 ...done " >> $log_file 2>&1 +} +###change empty_sshd permission +function check_empty_sshd(){ + if [ -d /mnt/var/empty/sshd ];then + echo "change empty_sshd permission to 711" >> $log_file 2>&1 + chmod 711 /mnt/var/empty/sshd;else + echo "This system does not have dir /var/empty/sshd" >> $log_file 2>&1 + fi +} +###check selinux +function disable_selinux(){ + if [ -e ${selinux_config_file} ];then + echo "anon selinux and add a new disable line " >>$log_file 2>&1 + sed -i 's/^SELINUX=/#SELINUX=/g' ${selinux_config_file} + sed -i '/^#SELINUX=/a\SELINUX=disabled' ${selinux_config_file} + else + echo "this os does not have selinux config file" >>$log_file 2>&1 + fi + +} +###check sysctl +function disable_sysctl(){ + if [[ ${sysctl_line_count} -ge 50 ]]; then + echo "sysctl.conf line count ge 100,so anon all line" >>$log_file 2>&1 + sed -i 's/^/#/g' ${sysctl_config_file} + else + echo "check and anon nr_hugepages" >>$log_file 2>&1 + sed -i 's/^vm.nr_hugepages/#vm.nr_hugepages/g' ${sysctl_config_file} + echo "check and anon min_free_kbytes" >>$log_file 2>&1 + sed -i 's/^vm.min_free_kbytes/#vm.min_free_kbytes/g' ${sysctl_config_file} + fi +} +###check rc.local +function disable_rc_local(){ + if [ -e ${rc_local_file} ];then + echo "anon rc_local line" >>$log_file 2>&1 + sed -i 's/^/#/g' ${rc_local_file} + else + echo "This instance does not have rc_local files" >>$log_file 2>&1 + fi +} + +###check system release +function check_os_type(){ + if [ -e /mnt/etc/os-release ];then + os_type_1=$(grep -i "^ID=" /mnt/etc/os-release |awk -F "=" '{print tolower($NF)}'|tr -d "\"") + echo "This instance os_type is ${os_type_1}" >> $log_file 2>&1 + else + os_type_2=$(head -n 1 /mnt/etc/issue.net |awk '{print tolower($1)}') + echo "may be,this instance is ${os_type_2} " >> $log_file 2>&1 + fi +} +###Check passwd +function check_system_username(){ + if [ -e "${passfile}" ] && [ -s "${passfile}" ] ;then + echo "passwd file is not zero,start check username" >> $log_file 2>&1 + echo -e "##########\n#please note \n#centos & redhat does not have messagebus user \n#ubuntu & debian does not have dbus & polkitd user\n##########" >> $log_file 2>&1 + for c_user in ${system_user[@]} + do + if ! egrep -qs "^${c_user}" ${passfile} ;then + echo "This account ${c_user} not in ${passfile}" >> $log_file 2>&1 + grep ^${c_user} ${passbakfile} >> ${passfile} + if [ $? -eq 0 ];then + echo "${passbakfile} has this account ${c_user},Restore the account to ${passfile}" >> $log_file 2>&1 + else + echo "${passbakfile} does not have this account ${c_user},can not restore from the bakfile" >> $log_file 2>&1 + fi + else + echo "This account ${c_user} is ok" >> $log_file 2>&1 + fi + done + elif [ -e "${passbakfile}" ] && [ -s "${passbakfile}" ]; then + echo "File ${passfile} size is zero or not exists,bcakup to passwd.bak and use ${passbakfile} restore" >> $log_file 2>&1 + mv ${passfile} ${passfile}.bak + cp ${passbakfile} ${passfile} + else + echo "The ${passbakfile} file not exists or size is zero" >> $log_file 2>&1 + fi +} +###check shadow +function check_root_pass_shadow(){ + echo "anon root in passwd & shadow and add new root no passwd " >> $log_file 2>&1 + sed -i 's/^root/#root/g' ${passfile} + sed -i 's/^root/#root/g' ${shadowfile} + echo "root:x:0:0:root:/root:/bin/bash" >> ${passfile} + echo "root::18340:0:99999:7:::" >> ${shadowfile} + +} +###shibin check uuid fstab +function annotation_datadisk(){ + base_l="`cat ${fstab} | sed '/^$/d' |grep -v "#" | grep -v swap | grep -w -v /`" + echo "In fun [annotation_datadisk]: " >> $log_file 2>&1 + echo "${base_l}" | while read line; do + echo "blanking [ $line ]" >> $log_file 2>&1 + sed -i "s#$line#\#$line#g" $fstab + done +} + +function replace_sysdisk_by_diskname(){ + echo "In fun [replace_sysdisk_by_diskname]: " >> $log_file 2>&1 + sys_disk="`cat $fstab | sed '/^$/d'| grep -w /`" + line_r="/dev/vda1 / ext4 defaults 1 1" + if [ "$sys_disk" != "" ]; then + echo "Setting [ $sys_disk ] as [ $line_r ]" >> $log_file 2>&1 + sed -i "s#$sys_disk#$line_r#g" $fstab + fi +} +function reset_grub_by_diskname(){ + echo "In fun [reset_grub_by_diskname]: " >> $log_file 2>&1 + uuid="`blkid |grep vdb| awk '{print $2}'| sed 's/\"//g'`" + if [ -f ${grub2_f} ]; then + echo "cp ${grub2_f} ${grub2_f}_${datetime}" >> $log_file 2>&1 + cp ${grub2_f} ${grub2_f}_${datetime} + sed -i "s#$uuid#/dev/vda1#g" ${grub2_f} + elif [ -f ${grub_f} ]; then + cp ${grub_f} ${grub_f}_${datetime} + sed -i "s#$uuid#/dev/vda1#g" ${grub_f} + fi + +} +function backup_fstab(){ + echo "In fun [backup_fstab]" >> $log_file 2>&1 + echo " cp ${fstab} ${fstab}_${datetime}" >> $log_file 2>&1 + cp ${fstab} ${fstab}_${datetime} +} +###nvshen check sshdcofig +function check_sshdconfig() +{ + echo "check PermitRootLogin and change to yes" >>$log_file 2>&1 + sed -i 's/^PermitRootLogin.*$/PermitRootLogin yes/' ${sshdconfig_file} + echo "check AllowUsers AllowGroups DenyUsers DenyGroups and Comment out" >>$log_file 2>&1 + sed -i 's/^AllowUsers.*$/#AllowUsers/' ${sshdconfig_file} + sed -i 's/^AllowGroups.*$/#AllowGroups/' ${sshdconfig_file} + sed -i 's/^DenyUsers.*$/#DenyUsers/' ${sshdconfig_file} + sed -i 's/^DenyGroups.*$/#DenyGroups/' ${sshdconfig_file} + echo "check LoginGraceTime and cahnge to 100s" >>$log_file 2>&1 + sed -i 's/^LoginGraceTime.*$/LoginGraceTime 100/' ${sshdconfig_file} +} + +###start check +echo "mount bad sysdisk to mnt" >> $log_file 2>&1 +main +###set var sysctl line count +sysctl_line_count=$(awk 'END{print NR}' /mnt/etc/sysctl.conf) +echo "Check sysdisk mount ready" >> $log_file 2>&1 +mountpoint /mnt >> $log_file 2>&1 +if [ $? -ne 0 ] + then + echo "The /mnt dir does not ready mount" >> $log_file 2>&1 + else + echo "The /mnt dir mount ready ,start check..." >> $log_file 2>&1 + echo "Start...Check the existence of system important directories" >> $log_file 2>&1 + check_sys_important_dir >>$log_file 2>&1 + sleep 3s + echo "Check /mnt dir permission 777 and change it " >> $log_file 2>&1 + check_rootdir_permission >>$log_file 2>&1 + sleep 3s + echo "Check /mnt/etc/ssh dir permission 777 and change it" >> $log_file 2>&1 + check_ssh_permission >>$log_file 2>&1 + sleep 3s + echo "backup fstab " >> $log_file 2>&1 + backup_fstab >>$log_file 2>&1 + sleep 3s + echo "replace fstab sysdisk " >> $log_file 2>&1 + replace_sysdisk_by_diskname >>$log_file 2>&1 + sleep 3s + echo "anon datadisk in fstab " >> $log_file 2>&1 + annotation_datadisk >>$log_file 2>&1 + sleep 3s + echo "replace uuid in grub " >> $log_file 2>&1 + reset_grub_by_diskname >>$log_file 2>&1 + sleep 3s + echo "check and disable selinux" >>$log_file 2>&1 + disable_selinux >>$log_file 2>&1 + sleep 3s + echo "check and disable sysctl var set" >>$log_file 2>&1 + disable_sysctl >>$log_file 2>&1 + sleep 3s + echo "anon rc_local all line" >>$log_file 2>&1 + disable_rc_local + sleep 3s + echo "check sshd_config" >>$log_file 2>&1 + check_sshdconfig + sleep 3s + echo "check empty sshd dir and change dir permission to 711" >>$log_file 2>&1 + check_empty_sshd + sleep 3s + echo "Check instance os_type" >>$log_file 2>&1 + check_os_type + sleep 3s + echo "install dos2unix " >>$log_file 2>&1 + install_dos2unix + sleep 3s + echo "Check passwd file" >>$log_file 2>&1 + check_system_username + sleep 3s + echo "Check shadow file" >>$log_file 2>&1 + check_root_pass_shadow +fi +# script end------------ +sleep 30s + +# upload logs to OSS +cd /var/log/ecsanalyse; +test -e ossutil64 || wget http://gosspublic.alicdn.com/ossutil/1.6.10/ossutil64 && chmod 755 /var/log/ecsanalyse/ossutil64; +/var/log/ecsanalyse/ossutil64 -i your_ak -k your_sk -e oss-cn-beijing.aliyuncs.com cp -f $log_file oss://public-beijing/ooslogs/ + diff --git a/shell/template.sh b/shell/template.sh index 69a2be6..938a19e 100644 --- a/shell/template.sh +++ b/shell/template.sh @@ -14,11 +14,23 @@ set -o nounset # Disallow expansion of unset variables set -o pipefail # Use last non-zero exit code in a pipeline +###################################################################################################### +# environment configuration +###################################################################################################### + + TAG="CMD" LOG_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/logs" LOG_FILE="$LOG_PATH/example_`date +"%Y%m%d"`.log" HIDE_LOG=true + + +###################################################################################################### +# function +###################################################################################################### + + function log() { [ ! -d "$LOG_PATH" ] && mkdir -p $LOG_PATH if [ $HIDE_LOG ]; then @@ -55,4 +67,9 @@ function main() { } +###################################################################################################### +# main +###################################################################################################### + + main "${@}" diff --git a/shell/utils.sh b/shell/utils.sh new file mode 100644 index 0000000..f4b6c5e --- /dev/null +++ b/shell/utils.sh @@ -0,0 +1,102 @@ + + +util::download_file() { + local -r url=$1 + local -r destination_file=$2 + + rm "${destination_file}" 2&> /dev/null || true + + for i in $(seq 5) + do + if ! curl -fsSL --retry 3 --keepalive-time 2 "${url}" -o "${destination_file}"; then + echo "Downloading ${url} failed. $((5-i)) retries left." + sleep 1 + else + echo "Downloading ${url} succeed" + return 0 + fi + done + return 1 +} + + +# Example: util::wait_for_success 120 5 "kubectl get nodes|grep localhost" +# arguments: wait time, sleep time, shell command +# returns 0 if the shell command get output, 1 otherwise. +util::wait_for_success(){ + local wait_time="$1" + local sleep_time="$2" + local cmd="$3" + while [ "$wait_time" -gt 0 ]; do + if eval "$cmd"; then + return 0 + else + sleep "$sleep_time" + wait_time=$((wait_time-sleep_time)) + fi + done + return 1 +} + +util::host_os() { + local host_os + case "$(uname -s)" in + Darwin) + host_os=darwin + ;; + Linux) + host_os=linux + ;; + *) + echo "Unsupported host OS. Must be Linux or Mac OS X." + exit 1 + ;; + esac + echo "${host_os}" +} + +util::host_arch() { + local host_arch + case "$(uname -m)" in + x86_64*) + host_arch=amd64 + ;; + i?86_64*) + host_arch=amd64 + ;; + amd64*) + host_arch=amd64 + ;; + aarch64*) + host_arch=arm64 + ;; + arm64*) + host_arch=arm64 + ;; + arm*) + host_arch=arm + ;; + i?86*) + host_arch=x86 + ;; + s390x*) + host_arch=s390x + ;; + ppc64le*) + host_arch=ppc64le + ;; + *) + echo "Unsupported host arch. Must be x86_64, 386, arm, arm64, s390x or ppc64le." + exit 1 + ;; + esac + echo "${host_arch}" +} + +util::md5() { + if which md5 >/dev/null 2>&1; then + md5 -q "$1" + else + md5sum "$1" | awk '{ print $1 }' + fi +}