add

5 years ago · 2007428466
9 changed files with 1014 additions and 80 deletions
--- a/bat/vmware-batch.bat
+++ b/bat/vmware-batch.bat
@ -9,17 +9,27 @@ IF EXIST "%PROGRAMFILES(X86)%\VMWare\VMWare Workstation\vmrun.exe" SET VMwarePat
 IF EXIST "%PROGRAMFILES%\VMware\VMware VIX\vmrun.exe" SET VMwarePath=%PROGRAMFILES%\VMware\VMware VIX
 IF EXIST "%PROGRAMFILES(X86)%\VMware\VMware VIX\vmrun.exe" SET VMRUN=%PROGRAMFILES(X86)%\VMware\VMware VIX
-::±äÁ¿ÉèÖÃ
+:: VMware安装地址
-::set VMwarePath="C:\Program Files (x86)\VMware\VMware Workstation"
+# set VMwarePath="C:\Program Files (x86)\VMware\VMware Workstation"
 :: 虚拟机存放目录
 set VMpath="D:\Virtual Machines"
-set VMname=CentOS_7.4_x64_node
+:: 虚拟机名称
 set VMname=CentOS_7.8_x64_node
 :: 虚拟机快照名称
 set VMSnapshot=init
 :: 新建虚拟机数目
 set VMcount=5
-set VMowa="D:\vmware owa\CentOS_7.4_x64.ova"
+:: 虚拟机owa模板位置
 set VMowa="D:\vmware owa\CentOS_7.8_x64_base.ova"
 :: 模板系统用户名
 set VMuser=root
 :: 模板系统密码
 set VMpass=123456
-set VMipStart=10
+:: 虚拟机网络
 set VMnetwork=192.168.77
 :: 虚拟机ip开始地址
 set VMipStart=130
 :init
@ -75,6 +85,7 @@ ping /n 2 127.1>nul
 set /p a=^><nul
 )
 cls
 goto init
 :oneKey
@ -109,19 +120,22 @@ echo
 cd OVFTool
 ovftool --name=!VMname!%%a !VMowa! !VMpath!
 cd ..
 echo.
 echo 启动虚拟机: !VMname!%%a
 vmrun -T ws start !VMpath!\!VMname!%%a\!VMname!%%a.vmx
 )
 echo.
 echo 设置ip:
 for /l %%a in (1,1,%VMcount%) do (
 set name=!VMname!%%a
 set /a num=%VMipStart%+%%a-1
 set ip=!VMnetwork!.!num!
 echo !name!:!ip!
-vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/^IPADDR=.*/IPADDR=!ip!/g' /etc/sysconfig/network-scripts/ifcfg-ens33;/etc/init.d/network restart || sudo sed -i 's/^address.*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
+vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo echo 'node!num!' > /etc/hostname; sudo sed -i 's/IPADDR=.*$/IPADDR="!ip!"/g' /etc/sysconfig/network-scripts/ifcfg-e*;/etc/init.d/network restart || sudo sed -i 's/address .*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
 )
 echo.
 echo 创建快照:
 for /l %%a in (1,1,%VMcount%) do (
 set name=!VMname!%%a
@ -308,7 +322,7 @@ set name=!VMname!%%a
 set /a num=%VMipStart%+%%a-1
 set ip=!VMnetwork!.!num!
 echo !name!:!ip!
-vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/^IPADDR=.*/IPADDR=!ip!/g' /etc/sysconfig/network-scripts/ifcfg-ens33;/etc/init.d/network restart || sudo sed -i 's/^address.*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
+vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/IPADDR=.*$/IPADDR="!ip!"/g' /etc/sysconfig/network-scripts/ifcfg-e*;/etc/init.d/network restart || sudo sed -i 's/address .*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
 )
 goto wait
--- a/python/supervisor_healthCheck.py
+++ b/python/supervisor_healthCheck.py
@ -4,10 +4,11 @@
 # @Time    : 2020-06-05
 # @Author  : lework
 # @Desc    : 针对supervisor的应用进行健康检查
-# @Version : 1.5
+# @Version : 1.6
 import os
 import re
 import sys
 import time
 import json
@ -89,62 +90,39 @@ def get_proc_cpu(pid):
        return None
    return cpu_utilization
-
+def get_proc_mem(pid, type="rss"):
 def get_proc_rss(pid, cumulative=False):
    """
    获取进程内存使用
    :param pid:
-    :param cumulative:
+    :param type:
    :return:
    """
    pscommand = 'ps -orss= -p %s'
    pstreecommand = 'ps ax -o "pid= ppid= rss="'
    ProcInfo = namedtuple('ProcInfo', ['pid', 'ppid', 'rss'])
    def find_children(parent_pid, procs):
        # 找出进程的子进程信息
        children = []
        for proc in procs:
            pid, ppid, rss = proc
            if ppid == parent_pid:
                children.append(proc)
                children.extend(find_children(pid, procs))
        return children
    if cumulative:
        # 统计进程的子进程rss
        _, data, _ = shell(pstreecommand)
        data = data.strip()
        procs = []
        for line in data.splitlines():
            p_pid, p_ppid, p_rss = map(int, line.split())
            procs.append(ProcInfo(pid=p_pid, ppid=p_ppid, rss=p_rss))
        # 计算rss
        try:
            parent_proc = [p for p in procs if p.pid == pid][0]
            children = find_children(pid, procs)
            tree = [parent_proc] + children
            rss = sum(map(int, [p.rss for p in tree]))
        except (ValueError, IndexError):
            # 计算错误时，返回None
            return None
-    else:
+    smaps_file = "/proc/%s/smaps" % pid
-        _, data, _ = shell(pscommand % pid)
+    smaps_data = ""
-        if not data:
+    if not os.path.exists(smaps_file):
-            # 未获取到数据值，或者没有此pid信息
+        print("[Error] not found %s" % smaps_file)
        return None
    try:
-            rss = data.strip()
+        with open("/proc/%s/smaps" % (pid)) as f:
-            rss = int(rss)
+            smaps_data = f.read().strip()
-        except ValueError:
+    except Exception as e:
-            # 获取的结果不包含数据，或者无法识别rss
+        print("[Error] %s" % e)
        return None
-    rss = rss / 1024  # rss 的单位是 KB， 这里返回MB单位
+    if type == "rss":
-    return rss
+        rss_re = re.compile(br"\nRss\:\s+(\d+)")
        data = sum(map(int, rss_re.findall(smaps_data)))
    elif type == "pss":
        pss_re = re.compile(br"\nPss\:\s+(\d+)")
        data = sum(map(int, pss_re.findall(smaps_data)))
    elif type == "uss":
        private_re = re.compile(br"\nPrivate.*:\s+(\d+)")
        data = sum(map(int, private_re.findall(smaps_data)))
    data = data / 1024  # rss 的单位是 KB， 这里返回MB单位
    return data
 class WorkerThread(threading.Thread):
@ -188,6 +166,7 @@ class HealthCheck(object):
        if 'config' in config:
            self.mail_config = config['config'].get('mail')
            self.wechat_config = config['config'].get('wechat')
            self.dingding_config = config['config'].get('dingding')
            self.supervisord_url = config['config'].get('supervisordUrl', self.supervisord_url)
            self.supervisord_user = config['config'].get('supervisordUser', None)
            self.supervisord_pass = config['config'].get('supervisordPass', None)
@ -196,7 +175,7 @@ class HealthCheck(object):
        self.program_config = config
        # 只保留通知action
-        self.notice_action = ['email', 'wechat']
+        self.notice_action = ['email', 'wechat', 'dingding']
        self.periodSeconds = 5
        self.failureThreshold = 3
@ -204,8 +183,8 @@ class HealthCheck(object):
        self.initialDelaySeconds = 1
        self.sendResolved = False
-        self.max_rss = 1024
+        self.mem_type = 'rss'
-        self.cumulative = False
+        self.max_mem = 1024
        self.max_cpu = 90
    def get_supervisord_conn(self):
@ -461,11 +440,11 @@ class HealthCheck(object):
        :return: dict
        """
        program = config.get('program')
-        max_rss = config.get('maxRss', self.max_rss)
+        max_mem = config.get('maxMem', self.max_mem)
-        cumulative = config.get('cumulative', self.cumulative)
+        mem_type = config.get('memType', self.mem_type)
        pid_get = config.get('pidGet', 'supervisor')
        pid_file = config.get('pidFile', )
-        check_info = 'max_rss:%sMB cumulative:%s' % (max_rss, cumulative)
+        check_info = 'max_mem:%sMB mem_type:%s' % (max_mem, mem_type)
        pid, err = self.get_pid(program, pid_get, pid_file)
        if pid == 0:
@ -473,13 +452,13 @@ class HealthCheck(object):
            return {'status': 'failure',
                    'msg': '[mem_check] program not starting, message: %s.' % err,
                    'info': check_info}
-        now_rss = get_proc_rss(pid, cumulative)
+        now_mem = get_proc_mem(pid, mem_type)
-        check_info = '%s now_rss:%sMB pid:%s' % (check_info, now_rss, pid)
+        check_info = '%s now_mem:%sMB pid:%s' % (check_info, now_mem, pid)
-        if now_rss >= int(max_rss):
+        if now_mem >= int(max_mem):
-            return {'status': 'failure', 'msg': '[mem_check] max_rss(%sMB) now_rss(%sMB)' % (max_rss, now_rss),
+            return {'status': 'failure', 'msg': '[mem_check] max_mem(%sMB) now_mem(%sMB)' % (max_mem, now_mem),
                    'info': check_info}
-        return {'status': 'success', 'msg': '[mem_check] max_rss(%sMB) now_rss(%sMB)' % (max_rss, now_rss),
+        return {'status': 'success', 'msg': '[mem_check] max_mem(%sMB) now_mem(%sMB)' % (max_mem, now_mem),
                'info': check_info}
    def cpu_check(self, config):
@ -544,6 +523,8 @@ class HealthCheck(object):
            self.action_email(program, action_type, msg, check_status)
        if 'wechat' in action_list and self.wechat_config:
            self.action_wechat(program, action_type, msg, check_status)
        if 'dingding' in action_list and self.dingding_config:
            self.action_dingding(program, action_type, msg, check_status)
    def action_supervisor_restart(self, program):
        """
@ -612,7 +593,7 @@ class HealthCheck(object):
        result = 'success'
        if int(pid) < 3:
-            return 'Failed to kill %s, pid: %s '% (program, pid)
+            return 'Failed to kill %s, pid: %s ' % (program, pid)
        cmd = "kill -9 %s" % pid
        exitcode, stdout, stderr = shell(cmd)
@ -785,6 +766,51 @@ class HealthCheck(object):
        self.log(program, '[Action: wechat] send success')
        return True
    def action_dingding(self, program, action_type, msg, check_status):
        curr_dt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        hostname = platform.node().split('.')[0]
        system_platform = platform.platform()
        host = "oapi.dingtalk.com"
        access_token = self.dingding_config.get('access_token')
        send_url = '/robot/send?access_token=={access_token}'.format(token=access_token)
        headers = {
            'Content-Type': 'application/json'
        }
        if check_status == 'success':
            title = "[%s] Health check successful" % program
        else:
            title = "[%s] Health check failed" % program
        send_data = {"msgtype": "markdown",
                     "markdown": {
                         "title": title,
                         "text": "#### 详情信息: \n> Program：%s \n\n> DataTime: %s \n\n> Hostname: %s \n\n> Platfrom: %s \n\n> Msg：%s" % (
                         program, curr_dt, hostname, system_platform, msg)
                     }
                     }
        try:
            httpClient = httplib.HTTPSConnection(host, timeout=10)
            httpClient.request("POST", send_url, json.dumps(data), headers=headers)
            response = httpClient.getresponse()
            result = json.loads(response.read())
            if result['errcode'] != 0:
                self.log(program, '[Action: dingding] send faild %s' % result)
                return False
        except Exception as e:
            self.log(program, '[Action: dingding] send error %s' % e)
            return False
        finally:
            if httpClient:
                httpClient.close()
        self.log(program, '[Action: dingding] send success')
        return True
    def start(self):
        """
        启动检测
@ -807,7 +833,7 @@ class HealthCheck(object):
        while 1:
            time.sleep(0.1)
-            for i,t in enumerate(threads):
+            for i, t in enumerate(threads):
                if not t.isAlive():
                    thread_name = t.getName()
                    self.log('ERROR', 'Exception in %s (catch by main): %s' % (thread_name, t.get_exception()))
@ -825,6 +851,7 @@ if __name__ == '__main__':
        print("Exit check!")
        sys.exit(0)
    signal.signal(signal.SIGINT, sig_handler)
    signal.signal(signal.SIGTERM, sig_handler)
    signal.signal(signal.SIGQUIT, sig_handler)
@ -850,12 +877,14 @@ config:                                          # 脚本配置名称,请勿更
 #    touser: 
 #    toparty: 
 #    totag: 
 #  dingding:                                     # 钉钉通知配置
     access_token:
 # 内存方式监控
 cat1:                     # supervisor中配置的program名称
  type: mem               # 检查类型: http,tcp,mem,cpu  默认: http
-  maxRss: 1024            # 内存阈值, 超过则为检测失败. 单位MB, 默认: 1024
+  maxMem: 1024            # 内存阈值, 超过则为检测失败. 单位MB, 默认: 1024
-  cumulative: True        # 是否统计子进程的内存, 默认: False
+  memType: rss            # 内存使用分类：rss, pss, uss 默认：rss
  pidGet: supervisor      # 获取pid的方式: supervisor,name,file, 选择name时,按program名称搜索pid,选择file时,需指定pidFile 默认: supervisor
  pidFile: /var/run/t.pid # 指定pid文件的路径, 只在pidGet为file的时候有用
  periodSeconds: 10       # 检查的频率(以秒为单位), 默认: 5
--- a/shell/get_proc_mem.sh
+++ b/shell/get_proc_mem.sh
@ -0,0 +1,89 @@
 #!/usr/bin/env bash
 pid=$1
 retries="${2:-0}"
 wait="${3:-1}"
 pid_smaps=""
 function get_meminfo() {
  [ ! -f "/proc/${pid}/smaps" ] \
    && { echo "[Error] not found $pid smaps file."; echo "Usage: bash $0 Pid Retries Wait, like: bash$0 1234 100 5"; exit 1; } \
    || pid_smaps=$(cat /proc/${pid}/smaps)
  mem_info=$(cat /proc/meminfo)
  mem_total=$(printf "%s" "${mem_info}"| awk '/^MemTotal:/  {print $2}')
  mem_free=$(printf "%s" "${mem_info}"| awk '/^MemFree:/  {print $2}')
  mem_available=$(printf "%s" "${mem_info}"| awk '/^MemAvailable:/  {print $2}')
  size=$(printf "%s" "${pid_smaps}" | awk '/^Size/{sum += $2}END{print sum}')
  rss=$(printf "%s" "${pid_smaps}" | awk '/^Rss/{sum += $2}END{print sum}')
  pss=$(printf "%s" "${pid_smaps}" | awk '/^Pss/{sum += $2}END{print sum}')
  shared_clean=$(printf "%s" "${pid_smaps}" | awk '/^Shared_Clean/{sum += $2}END{print sum}')
  shared_dirty=$(printf "%s" "${pid_smaps}" | awk '/^Shared_Dirty/{sum += $2}END{print sum}')
  private_clean=$(printf "%s" "${pid_smaps}" | awk '/^Private_Clean/{sum += $2}END{print sum}')
  private_dirty=$(printf "%s" "${pid_smaps}" | awk '/^Private_Dirty/{sum += $2}END{print sum}')
  swap=$(printf "%s" "${pid_smaps}" | awk '/^Swap/{sum += $2}END{print sum}')
  swap_pss=$(printf "%s" "${pid_smaps}" | awk '/^SwapPss/{sum += $2}END{print sum}')
 }
 count=0
 while [ $count -lt $retries ] ; do
  get_meminfo
  echo "Date: $(date +'%Y-%m-%d %T') MemTotal: $((mem_total/1024))MB MemFree: $((mem_free/1024))MB MemAvailable: $((mem_available/1024))MB RSS: $((${rss}/1024))MB PSS: $((${pss}/1024))MB USS: $(( (${private_clean} + ${private_dirty}) /1024 ))MB"
  sleep $wait
  count=$(($count + 1))
 done
 get_meminfo
 cat << EOF
 # OS meminfo
 MemTotal：内存总数
 MemFree：空闲内存数
 MemAvailable：可用内存数,包括cache/buffer、slab
 # Process smaps
 Size：表示该映射区域在虚拟内存空间中的大小。
 Rss： 表示该映射区域当前在物理内存中占用了多少空间
      Rss=Shared_Clean+Shared_Dirty+Private_Clean+Private_Dirty
 Pss： 该虚拟内存区域平摊计算后使用的物理内存大小(有些内存会和其他进程共享，例如mmap进来的)
      实际上包含下面private_clean+private_dirty，和按比例均分的shared_clean、shared_dirty。
 Uss:  Unique Set Size 进程独自占用的物理内存（不包含共享库占用的内存）
      USS=Private_Clean+Private_Dirty
 Shared_Clean：  和其他进程共享的未被改写的page的大小
 Shared_Dirty：  和其他进程共享的被改写的page的大小
 Private_Clean： 未被改写的私有页面的大小。
 Private_Dirty： 已被改写的私有页面的大小。
 Swap：   存在于交换分区的数据大小(如果物理内存有限，可能存在一部分在主存一部分在交换分区)
 SwapPss: 计算逻辑就跟pss一样，只不过针对的是交换分区的内存。
 Pid: ${pid}
 Cmd: $(tr -d '\0' < /proc/${pid}/cmdline | cut -c1-80)
 User: $(id -nu < /proc/${pid}/loginuid )
 Threads: $(awk '/Threads:/ {print $2}' /proc/${pid}/status)
 File: /proc/${pid}/smaps
 # Os meminfo
 MemTotal:              ${mem_total} KB
 MemFree:               ${mem_free} KB
 MemAvailable:          ${mem_available} KB
 # Process smaps
 Size:                  ${size} KB
 RSS:                   ${rss} kB
 PSS:                   ${pss} kB
 Shared_Clean:          ${shared_clean} kB
 Shared_Dirty:          ${shared_dirty} kB
 Private_Clean:         ${private_clean} kB
 Private_Dirty:         ${private_dirty} kB
 Swap:                  ${swap} kB
 SwapPss:               ${swap_pss} kB
 USS:                   ${private_clean} + ${private_dirty} = $(( ${private_clean} + ${private_dirty} )) kB
 EOF
--- a/shell/keystore.sh
+++ b/shell/keystore.sh
@ -0,0 +1,75 @@
 #!/bin/bash
 # 设置环境变量
 BASE_DIR=$(PWD)# 你需要修改此处
 CERT_OUTPUT_PATH="$BASE_DIR/certificates"
 PASSWORD=test1234
 KEY_STORE="$CERT_OUTPUT_PATH/server.keystore.jks"
 TRUST_STORE="$CERT_OUTPUT_PATH/server.truststore.jks"
 CLIENT_KEY_STORE="$CERT_OUTPUT_PATH/client.keystore.jks"
 CLIENT_TRUST_STORE="$CERT_OUTPUT_PATH/client.truststore.jks"
 KEY_PASSWORD=$PASSWORD
 STORE_PASSWORD=$PASSWORD
 TRUST_KEY_PASSWORD=$PASSWORD
 TRUST_STORE_PASSWORD=$PASSWORD
 CERT_AUTH_FILE="$CERT_OUTPUT_PATH/ca-cert"
 DAYS_VALID=3650
 DNAME="CN=Test, OU=YourDept, O=YourCompany, L=Shanghai, ST=Shanghai, C=CN"
 SUBJ="/C=CN/ST=Shanghai/L=Shanghai/O=YourCompany/OU=YourDept,CN=Test"
 mkdir -p $CERT_OUTPUT_PATH
 echo "1. 产生 key 和证书......"
 keytool -keystore $KEY_STORE -alias kafka-server -validity $DAYS_VALID -genkey -keyalg RSA \
 -storepass $STORE_PASSWORD -keypass $KEY_PASSWORD -dname "$DNAME"
 keytool -keystore $CLIENT_KEY_STORE -alias kafka-client -validity $DAYS_VALID -genkey -keyalg RSA \
 -storepass $STORE_PASSWORD -keypass $KEY_PASSWORD -dname "$DNAME"
 echo "2. 创建 CA......"
 openssl req -new -x509 -keyout $CERT_OUTPUT_PATH/ca-key -out "$CERT_AUTH_FILE" -days "$DAYS_VALID" \
 -passin pass:"$PASSWORD" -passout pass:"$PASSWORD" \
 -subj "$SUBJ"
 echo "3. 添加 CA 文件到 broker truststore......"
 keytool -keystore "$TRUST_STORE" -alias CARoot \
 -importcert -file "$CERT_AUTH_FILE" -storepass "$TRUST_STORE_PASSWORD" -keypass "$TRUST_KEY_PASS" -noprompt
 echo "4. 添加 CA 文件到 client truststore......"
 keytool -keystore "$CLIENT_TRUST_STORE" -alias CARoot \
 -importcert -file "$CERT_AUTH_FILE" -storepass "$TRUST_STORE_PASSWORD" -keypass "$TRUST_KEY_PASS" -noprompt
 echo "5. 从 keystore 中导出集群证书......"
 keytool -keystore "$KEY_STORE" -alias kafka-server -certreq -file "$CERT_OUTPUT_PATH/server-cert-file" \
 -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
 keytool -keystore "$CLIENT_KEY_STORE" -alias kafka-client -certreq -file "$CERT_OUTPUT_PATH/client-cert-file" \
 -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
 echo "6. 使用 CA 签发证书......"
 openssl x509 -req -CA "$CERT_AUTH_FILE" -CAkey $CERT_OUTPUT_PATH/ca-key -in "$CERT_OUTPUT_PATH/server-cert-file" \
 -out "$CERT_OUTPUT_PATH/server-cert-signed" -days "$DAYS_VALID" -CAcreateserial -passin pass:"$PASSWORD"
 openssl x509 -req -CA "$CERT_AUTH_FILE" -CAkey $CERT_OUTPUT_PATH/ca-key -in "$CERT_OUTPUT_PATH/client-cert-file" \
 -out "$CERT_OUTPUT_PATH/client-cert-signed" -days "$DAYS_VALID" -CAcreateserial -passin pass:"$PASSWORD"
 echo "7. 导入 CA 文件到 keystore......"
 keytool -keystore "$KEY_STORE" -alias CARoot -import -file "$CERT_AUTH_FILE" -storepass "$STORE_PASSWORD" \
 -keypass "$KEY_PASSWORD" -noprompt
 keytool -keystore "$CLIENT_KEY_STORE" -alias CARoot -import -file "$CERT_AUTH_FILE" -storepass "$STORE_PASSWORD" \
 -keypass "$KEY_PASSWORD" -noprompt
 echo "8. 导入已签发证书到 keystore......"
 keytool -keystore "$KEY_STORE" -alias kafka-server -import -file "$CERT_OUTPUT_PATH/server-cert-signed" \
 -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
 keytool -keystore "$CLIENT_KEY_STORE" -alias kafka-client -import -file "$CERT_OUTPUT_PATH/client-cert-signed" \
 -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
 echo "9. 删除临时文件......"
 rm "$CERT_OUTPUT_PATH/ca-cert.srl"
 rm "$CERT_OUTPUT_PATH/server-cert-signed"
 rm "$CERT_OUTPUT_PATH/client-cert-signed"
 rm "$CERT_OUTPUT_PATH/server-cert-file"
 rm "$CERT_OUTPUT_PATH/client-cert-file"
--- a/shell/kube-logging.sh
+++ b/shell/kube-logging.sh
@ -0,0 +1,171 @@
 #!/usr/bin/env bash
 # Copyright 2014 The Kubernetes Authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # Controls verbosity of the script output and logging.
 KUBE_VERBOSE="${KUBE_VERBOSE:-2}"
 # Handler for when we exit automatically on an error.
 # Borrowed from https://gist.github.com/ahendrix/7030300
 kube::log::errexit() {
  local err="${PIPESTATUS[*]}"
  # If the shell we are in doesn't have errexit set (common in subshells) then
  # don't dump stacks.
  set +o | grep -qe "-o errexit" || return
  set +o xtrace
  local code="${1:-1}"
  # Print out the stack trace described by $function_stack
  if [ ${#FUNCNAME[@]} -gt 2 ]
  then
    kube::log::error "Call tree:"
    for ((i=1;i<${#FUNCNAME[@]}-1;i++))
    do
      kube::log::error " ${i}: ${BASH_SOURCE[${i}+1]}:${BASH_LINENO[${i}]} ${FUNCNAME[${i}]}(...)"
    done
  fi
  kube::log::error_exit "Error in ${BASH_SOURCE[1]}:${BASH_LINENO[0]}. '${BASH_COMMAND}' exited with status ${err}" "${1:-1}" 1
 }
 kube::log::install_errexit() {
  # trap ERR to provide an error handler whenever a command exits nonzero  this
  # is a more verbose version of set -o errexit
  trap 'kube::log::errexit' ERR
  # setting errtrace allows our ERR trap handler to be propagated to functions,
  # expansions and subshells
  set -o errtrace
 }
 # Print out the stack trace
 #
 # Args:
 #   $1 The number of stack frames to skip when printing.
 kube::log::stack() {
  local stack_skip=${1:-0}
  stack_skip=$((stack_skip + 1))
  if [[ ${#FUNCNAME[@]} -gt ${stack_skip} ]]; then
    echo "Call stack:" >&2
    local i
    for ((i=1 ; i <= ${#FUNCNAME[@]} - stack_skip ; i++))
    do
      local frame_no=$((i - 1 + stack_skip))
      local source_file=${BASH_SOURCE[${frame_no}]}
      local source_lineno=${BASH_LINENO[$((frame_no - 1))]}
      local funcname=${FUNCNAME[${frame_no}]}
      echo "  ${i}: ${source_file}:${source_lineno} ${funcname}(...)" >&2
    done
  fi
 }
 # Log an error and exit.
 # Args:
 #   $1 Message to log with the error
 #   $2 The error code to return
 #   $3 The number of stack frames to skip when printing.
 kube::log::error_exit() {
  local message="${1:-}"
  local code="${2:-1}"
  local stack_skip="${3:-0}"
  stack_skip=$((stack_skip + 1))
  if [[ ${KUBE_VERBOSE} -ge 4 ]]; then
    local source_file=${BASH_SOURCE[${stack_skip}]}
    local source_line=${BASH_LINENO[$((stack_skip - 1))]}
    echo "!!! Error in ${source_file}:${source_line}" >&2
    [[ -z ${1-} ]] || {
      echo "  ${1}" >&2
    }
    kube::log::stack ${stack_skip}
    echo "Exiting with status ${code}" >&2
  fi
  exit "${code}"
 }
 # Log an error but keep going.  Don't dump the stack or exit.
 kube::log::error() {
  timestamp=$(date +"[%m%d %H:%M:%S]")
  echo "!!! ${timestamp} ${1-}" >&2
  shift
  for message; do
    echo "    ${message}" >&2
  done
 }
 # Print an usage message to stderr.  The arguments are printed directly.
 kube::log::usage() {
  echo >&2
  local message
  for message; do
    echo "${message}" >&2
  done
  echo >&2
 }
 kube::log::usage_from_stdin() {
  local messages=()
  while read -r line; do
    messages+=("${line}")
  done
  kube::log::usage "${messages[@]}"
 }
 # Print out some info that isn't a top level status line
 kube::log::info() {
  local V="${V:-0}"
  if [[ ${KUBE_VERBOSE} < ${V} ]]; then
    return
  fi
  for message; do
    echo "${message}"
  done
 }
 # Just like kube::log::info, but no \n, so you can make a progress bar
 kube::log::progress() {
  for message; do
    echo -e -n "${message}"
  done
 }
 kube::log::info_from_stdin() {
  local messages=()
  while read -r line; do
    messages+=("${line}")
  done
  kube::log::info "${messages[@]}"
 }
 # Print a status line.  Formatted to show up in a stream of output.
 kube::log::status() {
  local V="${V:-0}"
  if [[ ${KUBE_VERBOSE} < ${V} ]]; then
    return
  fi
  timestamp=$(date +"[%m%d %H:%M:%S]")
  echo "+++ ${timestamp} ${1}"
  shift
  for message; do
    echo "    ${message}"
  done
 }
--- a/shell/linux采集/caiji.sh
+++ b/shell/linux采集/caiji.sh
@ -0,0 +1,83 @@
 #!/bin/bash
 # create log file folder
 test -e /var/log/ecsanalyse || mkdir /var/log/ecsanalyse;
 datetime=$(date +%Y%m%d-%H-%M-%S-%N)
 log_filename=ecs_analyse_${datetime}.log
 log_file=/var/log/ecsanalyse/$log_filename
 # script start------------
 echo "##*problem_total_analyse" >> $log_file 2>&1
 echo "###dos-ff" >> $log_file 2>&1
 file /etc/passwd >> $log_file 2>&1
 file /etc/shadow >> $log_file 2>&1
 file /etc/pam.d/* >> $log_file 2>&1
 echo "###limits" >> $log_file 2>&1
 cat /etc/security/limits.conf | grep -Ev "^$|[#;]" >> $log_file 2>&1
 echo "###virtio-net-multiqueue" >> $log_file 2>&1
 for i in $(ip link | grep -E "^[0-9]+: .*:" -o | cut -d ":" -f 2 | grep -v lo); do echo $i >> $log_file 2>&1; ethtool -l $i 2>/dev/null | grep Combined >> $log_file 2>&1; done
 echo "###passwd" >> $log_file 2>&1
 cat /etc/passwd >> $log_file 2>&1
 echo "###cpu-top-5" >> $log_file 2>&1
 top -b -n 1 | grep "%Cpu(s):" >> $log_file 2>&1
 ps -eT -o%cpu,pid,tid,ppid,comm | grep -v CPU | sort -n -r | head -5 >> $log_file 2>&1
 echo "###ssh-perm" >> $log_file 2>&1
 echo "***centos" >> $log_file 2>&1
 ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /var/empty/* /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1
 echo "***ubuntu" >> $log_file 2>&1
 ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1
 echo "***debian" >> $log_file 2>&1
 ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1
 echo "###blkid" >> $log_file 2>&1
 blkid >> $log_file 2>&1
 echo "###osinfo" >> $log_file 2>&1
 if test -f "/etc/os-release"; then
 cat /etc/os-release | egrep "^NAME=|^VERSION=" >> $log_file 2>&1
 else
 echo "no os-release" >> $log_file 2>&1
 echo "no os-release" >> $log_file 2>&1
 fi
 if test -f "/etc/redhat-release" ; then
 echo "redhat-release:" $(cat /etc/redhat-release) >> $log_file 2>&1
 else
 echo "no redhat-release" >> $log_file 2>&1
 fi
 echo "uname: " $(uname -a) >> $log_file 2>&1
 echo "uname short\: " $(uname -r) >> $log_file 2>&1
 echo "###softlink" >> $log_file 2>&1
 ls -l / | grep "\->" >> $log_file 2>&1
 echo "###iptables" >> $log_file 2>&1
 echo "***centos-5" >> $log_file 2>&1
 service iptables status >> $log_file 2>&1
 echo "***centos-6" >> $log_file 2>&1
 service iptables status >> $log_file 2>&1
 echo "***centos-7" >> $log_file 2>&1
 firewall-cmd --state >> $log_file 2>&1
 echo "***ubuntu" >> $log_file 2>&1
 ufw status >> $log_file 2>&1
 echo "***default" >> $log_file 2>&1
 iptables -L >> $log_file 2>&1
 echo "###sysctl" >> $log_file 2>&1
 cat /etc/sysctl.conf | grep nr_hugepages >> $log_file 2>&1
 echo -e "net.ipv4.tcp_tw_recycle=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_tw_recycle >> $log_file 2>&1
 echo -e "net.ipv4.tcp_timestamps=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_timestamps >> $log_file 2>&1
 echo -e "fs.nr_open=\c" >> $log_file 2>&1 && cat /proc/sys/fs/nr_open >> $log_file 2>&1
 echo -e "net.ipv4.tcp_sack=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_sack >> $log_file 2>&1
 echo "###fstab" >> $log_file 2>&1
 cat /etc/fstab | grep -Ev "^$|[#;]" >> $log_file 2>&1
 echo "###dmesg" >> $log_file 2>&1
 cat /proc/uptime >> $log_file 2>&1
 dmesg | grep "invoked oom-killer" | tail -n 1 >> $log_file 2>&1
 echo "###port-usage" >> $log_file 2>&1
 netstat -tapn | grep LISTEN | grep -E 'sshd' >> $log_file 2>&1
 echo "###selinux" >> $log_file 2>&1
 echo "***default" >> $log_file 2>&1
 getenforce >> $log_file 2>&1
 echo "***ubuntu" >> $log_file 2>&1
 service selinux status > /dev/null; echo $? >> $log_file 2>&1
 echo "***debian" >> $log_file 2>&1
 service selinux status > /dev/null; echo $? >> $log_file 2>&1
 echo "###meminfo" >> $log_file 2>&1
 cat /proc/meminfo | grep Hugepagesize >> $log_file 2>&1
 cat /proc/meminfo | grep MemTotal >> $log_file 2>&1
 # script end------------
--- a/shell/linux采集/oos_noak.sh
+++ b/shell/linux采集/oos_noak.sh
@ -0,0 +1,354 @@
 #!/bin/bash
 # create log file folder
 test -e /var/log/ecsanalyse || mkdir -p /var/log/ecsanalyse;
 datetime=$(date +%Y%m%d-%H-%M-%S-%N)
 log_filename=ecs_analyse_${datetime}.log
 log_file=/var/log/ecsanalyse/$log_filename
 #set var
 regionId=$(curl -s http://100.100.100.200/latest/meta-data/region-id)
 mount_dir=/mnt
 disk_vdb=$(blkid |grep -v vda | awk -F : '{print $1}')
 fstab=/mnt/etc/fstab
 grub_f=/mnt/boot/grub/grub.cfg
 grub2_f=/mnt/boot/grub2/grub.cfg
 selinux_config_file=/mnt/etc/selinux/config
 sysctl_config_file=/mnt/etc/sysctl.conf
 rc_local_file=/mnt/etc/rc.local
 sshdconfig_file=/mnt/etc/ssh/sshd_config
 passfile=/mnt/etc/passwd
 passbakfile="/mnt/etc/passwd-"
 shadowfile=/mnt/etc/shadow
 shadowbakfile="/mnt/etc/shadow-"
 system_user=(root daemon nobody dbus polkitd sshd messagebus)
 system_dir=(/mnt/bin /mnt/sbin /mnt/usr/bin /mnt/usr/sbin /mnt/lib /mnt/lib64 /mnt/usr/lib /mnt/usr/lib64 /mnt/etc /mnt/boot /mnt/var /mnt/var)
 ###change new instance root passwd to rand passwd
 newpw=$(openssl rand -base64 10)
 echo root:${newpw}|chpasswd
 echo "The new instance passwd is ${newpw}" >>$log_file 2>&1
 ###define upload log function
 function upload_log(){
    cd /var/log/ecsanalyse;
    test -e ossutil64 || wget http://gosspublic.alicdn.com/ossutil/1.6.10/ossutil64 && chmod 755 /var/log/ecsanalyse/ossutil64;
    /var/log/ecsanalyse/ossutil64 -i your_ak -k your_sk -e oss-cn-beijing.aliyuncs.com cp -f $log_file oss://public-beijing/ooslogs/
 }
 ###yuanyuan checkdisk and mount disk
 function check_vdb_is_exist()
 {
 if [ ! -n "$disk_vdb" ]
 then
   echo "Disk can not find in this system ,exit " >>$log_file 2>&1
   upload_log
   exit
 fi
 }
 function mount_disk()
 {
    test -e $mount_dir || mkdir  -p $mount_dir
    if mountpoint -q $mount_dir;then umount $mount_dir;fi
    mount $disk_vdb  $mount_dir  >>$log_file 2>&1
 }
 function get_disk_usage()
 {
    disk_usage=$(df -h $mount_dir | tail -n 1 | awk '{print $5}')
    disk_inode_usage=$(df -i $mount_dir | tail -n 1| awk '{print $5}')
    echo "disk space usage: $disk_usage" >>$log_file 2>&1
    echo "disk inode usage: $disk_inode_usage" >>$log_file 2>&1
 }
 function repair_fs()
 {
    if grep -qs "$mount_dir" /proc/mounts
    then
       echo "Check MNT dir is mounted,umount it" >>$log_file 2>&1
       umount  $mount_dir
    fi
    fs_type=$(blkid | grep -v vda )
    if [[ "$fs_type" =~ "ext" ]]
    then
       if  [[ "$(tune2fs -l $disk_vdb|grep state)" =~ "clean" ]]
          then
              echo "Extfs is clean ,dont need fsck" >>$log_file 2>&1
          else
              echo "This line means that the file system need fsck，its ext" >>$log_file 2>&1
              fsck  -y $disk_vdb >>$log_file 2>&1
       fi
    else
        echo "This line means that the file system is xfs " >>$log_file 2>&1
        xfs_repair $disk_vdb >>$log_file 2>&1
    fi
 }
 function  main(){
 check_vdb_is_exist
 repair_fs
 mount_disk && get_disk_usage
 }
 ###muyuan Check the existence of system important directories
 function check_sys_important_dir(){
    for important_dir in ${system_dir[@]}
    do
        if [ ! -e $important_dir ]
        then
        echo "This sys dir or link $important_dir does not exist " >> $log_file 2>&1
        fi
    done
    echo "Check the existence of system important directories...done"  >> $log_file 2>&1
 }
 ###install dos2unix
 function install_dos2unix(){
    if ! which dos2unix ;then
        yum -y install dos2unix >> $log_file 2>&1
        sleep 5s
        dos2unix ${passfile} >> $log_file 2>&1
        dos2unix ${shadowfile} >> $log_file 2>&1
    else
        dos2unix ${passfile} >> $log_file 2>&1
        dos2unix ${shadowfile} >> $log_file 2>&1
    fi
 }
 ###check / 777
 function check_rootdir_permission(){
    for dirfile in $( ls -l /mnt |grep rwxrwx|egrep -v "\->|tmp"|awk '{print "/mnt/"$NF}');
        do 
            if [ -d "$dirfile" ] 
                then 
                echo "This dir ${dirfile} permission is 777,chmod to 755" >> $log_file 2>&1
                chmod -R 755 ${dirfile} >> $log_file 2>&1
                else 
                echo "This file ${dirfile} permission is 777,chmod to 644" >> $log_file 2>&1
                chmod 644 ${dirfile} >> $log_file 2>&1
            fi 
        done
        echo "Check /mnt dir permission 777 ...done " >> $log_file 2>&1
 }
 ###check ssh 777
 function check_ssh_permission(){
    for sshfile in $(ls -l  /mnt/etc/ssh | grep rwxrwx | awk '{print "/mnt/etc/ssh/"$NF}')
        do
            if [ "${sshfile##*.}"x = "pub"x  ] || [[ "${sshfile}"x =~ "_config"x ]] || [[ "${sshfile}"x =~ "moduli"x ]] 
                then
                echo "Change file ${sshfile} permission to 644" >> $log_file 2>&1
                chmod 644 ${sshfile} >> $log_file 2>&1
                else
                echo "Change file ${sshfile} permission to 600" >> $log_file 2>&1
                chmod 600 ${sshfile} >> $log_file 2>&1
            fi
        done
    echo "Check /mnt/etc/ssh dir permission 777 ...done "  >> $log_file 2>&1
 }
 ###change empty_sshd permission
 function check_empty_sshd(){
    if [ -d /mnt/var/empty/sshd ];then
    echo "change empty_sshd permission to 711"  >> $log_file 2>&1
    chmod 711 /mnt/var/empty/sshd;else
    echo "This system does not have dir /var/empty/sshd"  >> $log_file 2>&1
    fi
 }
 ###check selinux
 function disable_selinux(){
    if [ -e ${selinux_config_file} ];then
        echo "anon selinux and add a new disable line "  >>$log_file 2>&1
        sed -i 's/^SELINUX=/#SELINUX=/g' ${selinux_config_file}
        sed -i '/^#SELINUX=/a\SELINUX=disabled' ${selinux_config_file}
    else
        echo "this os does not have selinux config file" >>$log_file 2>&1
    fi
 }
 ###check sysctl
 function disable_sysctl(){
    if [[ ${sysctl_line_count} -ge 50 ]]; then
        echo "sysctl.conf line count ge 100,so anon all line" >>$log_file 2>&1
        sed -i 's/^/#/g' ${sysctl_config_file} 
    else
        echo "check and anon nr_hugepages" >>$log_file 2>&1
        sed -i 's/^vm.nr_hugepages/#vm.nr_hugepages/g' ${sysctl_config_file} 
        echo "check and anon min_free_kbytes" >>$log_file 2>&1
        sed -i 's/^vm.min_free_kbytes/#vm.min_free_kbytes/g' ${sysctl_config_file} 
    fi
 }
 ###check rc.local
 function disable_rc_local(){
    if [ -e ${rc_local_file} ];then
        echo "anon rc_local line"  >>$log_file 2>&1
        sed -i 's/^/#/g' ${rc_local_file}
    else
        echo "This instance does not have rc_local files" >>$log_file 2>&1
    fi
 }
 ###check system release
 function check_os_type(){
    if [ -e /mnt/etc/os-release ];then
        os_type_1=$(grep -i "^ID=" /mnt/etc/os-release |awk -F "=" '{print tolower($NF)}'|tr -d "\"")
        echo "This instance os_type is ${os_type_1}"  >> $log_file 2>&1
    else
        os_type_2=$(head -n 1 /mnt/etc/issue.net |awk '{print  tolower($1)}')
        echo "may be,this instance is ${os_type_2} " >> $log_file 2>&1
    fi 
 }
 ###Check passwd 
 function check_system_username(){
    if [ -e "${passfile}" ] && [ -s "${passfile}" ] ;then
        echo "passwd file is not zero,start check username"  >> $log_file 2>&1
        echo -e "##########\n#please note \n#centos & redhat does not have messagebus user  \n#ubuntu & debian does not have dbus & polkitd user\n##########" >> $log_file 2>&1
        for c_user in ${system_user[@]}
        do
            if ! egrep -qs "^${c_user}" ${passfile} ;then
                echo "This account ${c_user} not in ${passfile}" >> $log_file 2>&1
                grep  ^${c_user} ${passbakfile} >> ${passfile}
                if [ $? -eq 0 ];then
                echo "${passbakfile} has this account ${c_user},Restore the account to ${passfile}" >> $log_file 2>&1
                else
                echo "${passbakfile} does not have this account ${c_user},can not restore from the bakfile" >> $log_file 2>&1
                fi
            else
                echo "This account ${c_user} is ok" >> $log_file 2>&1
            fi
        done
    elif [ -e "${passbakfile}" ] && [ -s "${passbakfile}" ]; then
        echo "File ${passfile} size is zero or not exists，bcakup to passwd.bak and use ${passbakfile} restore" >> $log_file 2>&1
        mv ${passfile} ${passfile}.bak
        cp ${passbakfile} ${passfile}
    else
        echo "The ${passbakfile} file not exists or size is zero" >> $log_file 2>&1
    fi
 }
 ###check shadow
 function check_root_pass_shadow(){
    echo "anon root in passwd & shadow and add new root no passwd " >> $log_file 2>&1
    sed -i 's/^root/#root/g' ${passfile}
    sed -i 's/^root/#root/g' ${shadowfile} 
    echo "root:x:0:0:root:/root:/bin/bash" >> ${passfile} 
    echo "root::18340:0:99999:7:::" >> ${shadowfile}
 }
 ###shibin check uuid fstab
 function annotation_datadisk(){
    base_l="`cat ${fstab} | sed '/^$/d' |grep -v "#" | grep -v swap | grep -w -v /`"
    echo "In fun [annotation_datadisk]: " >> $log_file 2>&1 
    echo "${base_l}" | while read line; do
 	echo "blanking [ $line ]" >>  $log_file 2>&1
        sed  -i "s#$line#\#$line#g" $fstab
    done
 }
 function replace_sysdisk_by_diskname(){
    echo "In fun [replace_sysdisk_by_diskname]: " >> $log_file 2>&1 
    sys_disk="`cat $fstab | sed '/^$/d'| grep -w  /`"
    line_r="/dev/vda1       /       ext4    defaults    1 1"
    if [ "$sys_disk" != "" ]; then
 	echo "Setting [ $sys_disk ] as [ $line_r ]" >> $log_file 2>&1
        sed -i "s#$sys_disk#$line_r#g" $fstab
    fi
 }
 function reset_grub_by_diskname(){
    echo "In fun [reset_grub_by_diskname]: " >> $log_file 2>&1 
    uuid="`blkid |grep vdb| awk '{print $2}'| sed 's/\"//g'`"
    if [ -f ${grub2_f} ]; then
 	echo "cp ${grub2_f} ${grub2_f}_${datetime}" >> $log_file 2>&1
        cp ${grub2_f} ${grub2_f}_${datetime}
        sed -i "s#$uuid#/dev/vda1#g" ${grub2_f}
    elif [ -f ${grub_f} ]; then
        cp ${grub_f} ${grub_f}_${datetime}
        sed -i "s#$uuid#/dev/vda1#g" ${grub_f}
    fi
 }
 function backup_fstab(){
    echo "In fun [backup_fstab]" >> $log_file 2>&1
    echo  " cp ${fstab} ${fstab}_${datetime}" >> $log_file 2>&1 
    cp ${fstab} ${fstab}_${datetime}
 }
 ###nvshen check sshdcofig
 function check_sshdconfig()
 {
    echo "check PermitRootLogin and change to yes" >>$log_file 2>&1
    sed -i 's/^PermitRootLogin.*$/PermitRootLogin yes/' ${sshdconfig_file}
    echo "check AllowUsers AllowGroups DenyUsers DenyGroups and Comment out" >>$log_file 2>&1
    sed -i 's/^AllowUsers.*$/#AllowUsers/' ${sshdconfig_file}
    sed -i 's/^AllowGroups.*$/#AllowGroups/' ${sshdconfig_file}
    sed -i 's/^DenyUsers.*$/#DenyUsers/' ${sshdconfig_file}
    sed -i 's/^DenyGroups.*$/#DenyGroups/' ${sshdconfig_file}
    echo "check LoginGraceTime and cahnge to 100s" >>$log_file 2>&1
    sed -i 's/^LoginGraceTime.*$/LoginGraceTime 100/' ${sshdconfig_file}
 }
 ###start check
 echo "mount bad sysdisk to mnt" >> $log_file 2>&1
 main
 ###set var sysctl line count
 sysctl_line_count=$(awk 'END{print NR}' /mnt/etc/sysctl.conf)
 echo "Check sysdisk mount ready" >> $log_file 2>&1
 mountpoint /mnt >> $log_file 2>&1
 if [ $? -ne 0 ]
    then
    echo "The /mnt dir  does not ready mount" >> $log_file 2>&1
    else
    echo "The /mnt dir mount ready ,start check..." >> $log_file 2>&1
    echo "Start...Check the existence of system important directories" >> $log_file 2>&1
    check_sys_important_dir >>$log_file 2>&1
    sleep 3s
    echo "Check /mnt dir permission 777 and change it " >> $log_file 2>&1
    check_rootdir_permission >>$log_file 2>&1
    sleep 3s
    echo "Check /mnt/etc/ssh dir permission 777 and change it" >> $log_file 2>&1
    check_ssh_permission >>$log_file 2>&1
    sleep 3s
    echo "backup  fstab " >> $log_file 2>&1
    backup_fstab >>$log_file 2>&1
    sleep 3s
    echo "replace fstab sysdisk  " >> $log_file 2>&1
    replace_sysdisk_by_diskname >>$log_file 2>&1
    sleep 3s
    echo "anon datadisk in fstab " >> $log_file 2>&1
    annotation_datadisk >>$log_file 2>&1
    sleep 3s
    echo "replace uuid in grub " >> $log_file 2>&1
    reset_grub_by_diskname >>$log_file 2>&1
    sleep 3s
    echo "check and disable selinux"  >>$log_file 2>&1
    disable_selinux >>$log_file 2>&1
    sleep 3s
    echo "check and disable sysctl var set"  >>$log_file 2>&1
    disable_sysctl >>$log_file 2>&1
    sleep 3s
    echo "anon rc_local all line"  >>$log_file 2>&1
    disable_rc_local
    sleep 3s
    echo "check sshd_config"  >>$log_file 2>&1
    check_sshdconfig
    sleep 3s
    echo "check empty sshd dir and change dir permission to 711" >>$log_file 2>&1
    check_empty_sshd
    sleep 3s
    echo "Check instance os_type" >>$log_file 2>&1
    check_os_type
    sleep 3s
    echo "install dos2unix " >>$log_file 2>&1
    install_dos2unix
    sleep 3s
    echo "Check passwd file" >>$log_file 2>&1
    check_system_username
    sleep 3s
    echo "Check shadow file" >>$log_file 2>&1
    check_root_pass_shadow
 fi
 # script end------------
 sleep 30s
 # upload logs to OSS
 cd /var/log/ecsanalyse;
 test -e ossutil64 || wget http://gosspublic.alicdn.com/ossutil/1.6.10/ossutil64 && chmod 755 /var/log/ecsanalyse/ossutil64;
 /var/log/ecsanalyse/ossutil64 -i your_ak -k your_sk -e oss-cn-beijing.aliyuncs.com cp -f $log_file oss://public-beijing/ooslogs/
--- a/shell/template.sh
+++ b/shell/template.sh
@ -14,11 +14,23 @@ set -o nounset          # Disallow expansion of unset variables
 set -o pipefail         # Use last non-zero exit code in a pipeline
 ######################################################################################################
 # environment configuration
 ######################################################################################################
 TAG="CMD"
 LOG_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/logs"
 LOG_FILE="$LOG_PATH/example_`date +"%Y%m%d"`.log"
 HIDE_LOG=true
 ######################################################################################################
 # function
 ######################################################################################################
 function log() {
    [ ! -d "$LOG_PATH" ] && mkdir -p $LOG_PATH
    if [ $HIDE_LOG ]; then
@ -55,4 +67,9 @@ function main() {
 }
 ######################################################################################################
 # main 
 ######################################################################################################
 main "${@}"
--- a/shell/utils.sh
+++ b/shell/utils.sh
@ -0,0 +1,102 @@
 util::download_file() {
  local -r url=$1
  local -r destination_file=$2
  rm "${destination_file}" 2&> /dev/null || true
  for i in $(seq 5)
  do
    if ! curl -fsSL --retry 3 --keepalive-time 2 "${url}" -o "${destination_file}"; then
      echo "Downloading ${url} failed. $((5-i)) retries left."
      sleep 1
    else
      echo "Downloading ${url} succeed"
      return 0
    fi
  done
  return 1
 }
 # Example:  util::wait_for_success 120 5 "kubectl get nodes|grep localhost"
 # arguments: wait time, sleep time, shell command
 # returns 0 if the shell command get output, 1 otherwise.
 util::wait_for_success(){
  local wait_time="$1"
  local sleep_time="$2"
  local cmd="$3"
  while [ "$wait_time" -gt 0 ]; do
    if eval "$cmd"; then
      return 0
    else
      sleep "$sleep_time"
      wait_time=$((wait_time-sleep_time))
    fi
  done
  return 1
 }
 util::host_os() {
  local host_os
  case "$(uname -s)" in
    Darwin)
      host_os=darwin
      ;;
    Linux)
      host_os=linux
      ;;
    *)
      echo "Unsupported host OS.  Must be Linux or Mac OS X."
      exit 1
      ;;
  esac
  echo "${host_os}"
 }
 util::host_arch() {
  local host_arch
  case "$(uname -m)" in
    x86_64*)
      host_arch=amd64
      ;;
    i?86_64*)
      host_arch=amd64
      ;;
    amd64*)
      host_arch=amd64
      ;;
    aarch64*)
      host_arch=arm64
      ;;
    arm64*)
      host_arch=arm64
      ;;
    arm*)
      host_arch=arm
      ;;
    i?86*)
      host_arch=x86
      ;;
    s390x*)
      host_arch=s390x
      ;;
    ppc64le*)
      host_arch=ppc64le
      ;;
    *)
      echo "Unsupported host arch. Must be x86_64, 386, arm, arm64, s390x or ppc64le."
      exit 1
      ;;
  esac
  echo "${host_arch}"
 }
 util::md5() {
  if which md5 >/dev/null 2>&1; then
    md5 -q "$1"
  else
    md5sum "$1" | awk '{ print $1 }'
  fi
 }