add

5 years ago · 2007428466
9 changed files with 1014 additions and 80 deletions
--- a/bat/vmware-batch.bat
+++ b/bat/vmware-batch.bat
@ -9,17 +9,27 @@ IF EXIST "%PROGRAMFILES(X86)%\VMWare\VMWare Workstation\vmrun.exe" SET VMwarePat
				@@ -9,17 +9,27 @@ IF EXIST "%PROGRAMFILES(X86)%\VMWare\VMWare Workstation\vmrun.exe" SET VMwarePat
 IF EXIST "%PROGRAMFILES%\VMware\VMware VIX\vmrun.exe" SET VMwarePath=%PROGRAMFILES%\VMware\VMware VIX
 IF EXIST "%PROGRAMFILES(X86)%\VMware\VMware VIX\vmrun.exe" SET VMRUN=%PROGRAMFILES(X86)%\VMware\VMware VIX

-::±äÁ¿ÉèÖÃ
-::set VMwarePath="C:\Program Files (x86)\VMware\VMware Workstation"
+:: VMware安装地址
+# set VMwarePath="C:\Program Files (x86)\VMware\VMware Workstation"
+:: 虚拟机存放目录
 set VMpath="D:\Virtual Machines"
-set VMname=CentOS_7.4_x64_node
+:: 虚拟机名称
+set VMname=CentOS_7.8_x64_node
+:: 虚拟机快照名称
 set VMSnapshot=init
+:: 新建虚拟机数目
 set VMcount=5
-set VMowa="D:\vmware owa\CentOS_7.4_x64.ova"
+:: 虚拟机owa模板位置
+set VMowa="D:\vmware owa\CentOS_7.8_x64_base.ova"
+:: 模板系统用户名
 set VMuser=root
+:: 模板系统密码
 set VMpass=123456
-set VMipStart=10
+:: 虚拟机网络
 set VMnetwork=192.168.77
+:: 虚拟机ip开始地址
+set VMipStart=130
+


 :init
@ -75,6 +85,7 @@ ping /n 2 127.1>nul
				@@ -75,6 +85,7 @@ ping /n 2 127.1>nul
 set /p a=^><nul
 )

+cls
 goto init

 :oneKey
@ -109,19 +120,22 @@ echo 
				@@ -109,19 +120,22 @@ echo
 cd OVFTool
 ovftool --name=!VMname!%%a !VMowa! !VMpath!
 cd ..
+echo.
 echo 启动虚拟机: !VMname!%%a
 vmrun -T ws start !VMpath!\!VMname!%%a\!VMname!%%a.vmx
 )

+echo.
 echo 设置ip:
 for /l %%a in (1,1,%VMcount%) do (
 set name=!VMname!%%a
 set /a num=%VMipStart%+%%a-1
 set ip=!VMnetwork!.!num!
 echo !name!:!ip!
-vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/^IPADDR=.*/IPADDR=!ip!/g' /etc/sysconfig/network-scripts/ifcfg-ens33;/etc/init.d/network restart || sudo sed -i 's/^address.*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
+vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo echo 'node!num!' > /etc/hostname; sudo sed -i 's/IPADDR=.*$/IPADDR="!ip!"/g' /etc/sysconfig/network-scripts/ifcfg-e*;/etc/init.d/network restart || sudo sed -i 's/address .*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
 )

+echo.
 echo 创建快照:
 for /l %%a in (1,1,%VMcount%) do (
 set name=!VMname!%%a
@ -308,7 +322,7 @@ set name=!VMname!%%a
				@@ -308,7 +322,7 @@ set name=!VMname!%%a
 set /a num=%VMipStart%+%%a-1
 set ip=!VMnetwork!.!num!
 echo !name!:!ip!
-vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/^IPADDR=.*/IPADDR=!ip!/g' /etc/sysconfig/network-scripts/ifcfg-ens33;/etc/init.d/network restart || sudo sed -i 's/^address.*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
+vmrun -T ws -gu !VMuser! -gp !VMpass! runProgramInGuest !VMpath!\!name!\!name!.vmx /bin/bash -c "sudo sed -i 's/IPADDR=.*$/IPADDR="!ip!"/g' /etc/sysconfig/network-scripts/ifcfg-e*;/etc/init.d/network restart || sudo sed -i 's/address .*$/address !ip!/g' /etc/network/interfaces;/etc/init.d/network restart" nogui
 )
 goto wait

--- a/python/supervisor_healthCheck.py
+++ b/python/supervisor_healthCheck.py
@ -4,10 +4,11 @@
				@@ -4,10 +4,11 @@
 # @Time    : 2020-06-05
 # @Author  : lework
 # @Desc    : 针对supervisor的应用进行健康检查
-# @Version : 1.5
+# @Version : 1.6


 import os
+import re
 import sys
 import time
 import json
@ -89,62 +90,39 @@ def get_proc_cpu(pid):
				@@ -89,62 +90,39 @@ def get_proc_cpu(pid):
        return None
    return cpu_utilization

-
-def get_proc_rss(pid, cumulative=False):
+def get_proc_mem(pid, type="rss"):
    """
    获取进程内存使用
    :param pid:
-    :param cumulative:
+    :param type:
    :return:
    """
-    pscommand = 'ps -orss= -p %s'
-    pstreecommand = 'ps ax -o "pid= ppid= rss="'
-    ProcInfo = namedtuple('ProcInfo', ['pid', 'ppid', 'rss'])
-
-    def find_children(parent_pid, procs):
-        # 找出进程的子进程信息
-        children = []
-        for proc in procs:
-            pid, ppid, rss = proc
-            if ppid == parent_pid:
-                children.append(proc)
-                children.extend(find_children(pid, procs))
-        return children
-
-    if cumulative:
-        # 统计进程的子进程rss
-        _, data, _ = shell(pstreecommand)
-        data = data.strip()
-
-        procs = []
-        for line in data.splitlines():
-            p_pid, p_ppid, p_rss = map(int, line.split())
-            procs.append(ProcInfo(pid=p_pid, ppid=p_ppid, rss=p_rss))
-
-        # 计算rss
-        try:
-            parent_proc = [p for p in procs if p.pid == pid][0]
-            children = find_children(pid, procs)
-            tree = [parent_proc] + children
-            rss = sum(map(int, [p.rss for p in tree]))
-        except (ValueError, IndexError):
-            # 计算错误时，返回None
-            return None
-
-    else:
-        _, data, _ = shell(pscommand % pid)
-        if not data:
-            # 未获取到数据值，或者没有此pid信息
-            return None
-        try:
-            rss = data.strip()
-            rss = int(rss)
-        except ValueError:
-            # 获取的结果不包含数据，或者无法识别rss
-            return None

-    rss = rss / 1024  # rss 的单位是 KB， 这里返回MB单位
-    return rss
+    smaps_file = "/proc/%s/smaps" % pid
+    smaps_data = ""
+    if not os.path.exists(smaps_file):
+        print("[Error] not found %s" % smaps_file)
+        return None
+
+    try:
+        with open("/proc/%s/smaps" % (pid)) as f:
+            smaps_data = f.read().strip()
+    except Exception as e:
+        print("[Error] %s" % e)
+        return None
+
+    if type == "rss":
+        rss_re = re.compile(br"\nRss\:\s+(\d+)")
+        data = sum(map(int, rss_re.findall(smaps_data)))
+    elif type == "pss":
+        pss_re = re.compile(br"\nPss\:\s+(\d+)")
+        data = sum(map(int, pss_re.findall(smaps_data)))
+    elif type == "uss":
+        private_re = re.compile(br"\nPrivate.*:\s+(\d+)")
+        data = sum(map(int, private_re.findall(smaps_data)))
+
+    data = data / 1024  # rss 的单位是 KB， 这里返回MB单位
+    return data


 class WorkerThread(threading.Thread):
@ -188,6 +166,7 @@ class HealthCheck(object):
				@@ -188,6 +166,7 @@ class HealthCheck(object):
        if 'config' in config:
            self.mail_config = config['config'].get('mail')
            self.wechat_config = config['config'].get('wechat')
+            self.dingding_config = config['config'].get('dingding')
            self.supervisord_url = config['config'].get('supervisordUrl', self.supervisord_url)
            self.supervisord_user = config['config'].get('supervisordUser', None)
            self.supervisord_pass = config['config'].get('supervisordPass', None)
@ -196,7 +175,7 @@ class HealthCheck(object):
				@@ -196,7 +175,7 @@ class HealthCheck(object):
        self.program_config = config

        # 只保留通知action
-        self.notice_action = ['email', 'wechat']
+        self.notice_action = ['email', 'wechat', 'dingding']

        self.periodSeconds = 5
        self.failureThreshold = 3
@ -204,8 +183,8 @@ class HealthCheck(object):
				@@ -204,8 +183,8 @@ class HealthCheck(object):
        self.initialDelaySeconds = 1
        self.sendResolved = False

-        self.max_rss = 1024
-        self.cumulative = False
+        self.mem_type = 'rss'
+        self.max_mem = 1024
        self.max_cpu = 90

    def get_supervisord_conn(self):
@ -359,7 +338,7 @@ class HealthCheck(object):
				@@ -359,7 +338,7 @@ class HealthCheck(object):
                            check_state[program]['failure'] != 0 and check_state[program]['failure'] % (
                            (periodSeconds + initialDelaySeconds) * 2) == 0):
                        action_param = {
-						    'config': config,
+                            'config': config,
                            'action_type': action_type,
                            'check_status': check_status,
                            'msg': check_result.get('msg', '')
@ -461,11 +440,11 @@ class HealthCheck(object):
				@@ -461,11 +440,11 @@ class HealthCheck(object):
        :return: dict
        """
        program = config.get('program')
-        max_rss = config.get('maxRss', self.max_rss)
-        cumulative = config.get('cumulative', self.cumulative)
+        max_mem = config.get('maxMem', self.max_mem)
+        mem_type = config.get('memType', self.mem_type)
        pid_get = config.get('pidGet', 'supervisor')
        pid_file = config.get('pidFile', )
-        check_info = 'max_rss:%sMB cumulative:%s' % (max_rss, cumulative)
+        check_info = 'max_mem:%sMB mem_type:%s' % (max_mem, mem_type)

        pid, err = self.get_pid(program, pid_get, pid_file)
        if pid == 0:
@ -473,13 +452,13 @@ class HealthCheck(object):
				@@ -473,13 +452,13 @@ class HealthCheck(object):
            return {'status': 'failure',
                    'msg': '[mem_check] program not starting, message: %s.' % err,
                    'info': check_info}
-        now_rss = get_proc_rss(pid, cumulative)
-        check_info = '%s now_rss:%sMB pid:%s' % (check_info, now_rss, pid)
-        if now_rss >= int(max_rss):
-            return {'status': 'failure', 'msg': '[mem_check] max_rss(%sMB) now_rss(%sMB)' % (max_rss, now_rss),
+        now_mem = get_proc_mem(pid, mem_type)
+        check_info = '%s now_mem:%sMB pid:%s' % (check_info, now_mem, pid)
+        if now_mem >= int(max_mem):
+            return {'status': 'failure', 'msg': '[mem_check] max_mem(%sMB) now_mem(%sMB)' % (max_mem, now_mem),
                    'info': check_info}

-        return {'status': 'success', 'msg': '[mem_check] max_rss(%sMB) now_rss(%sMB)' % (max_rss, now_rss),
+        return {'status': 'success', 'msg': '[mem_check] max_mem(%sMB) now_mem(%sMB)' % (max_mem, now_mem),
                'info': check_info}

    def cpu_check(self, config):
@ -522,7 +501,7 @@ class HealthCheck(object):
				@@ -522,7 +501,7 @@ class HealthCheck(object):
        msg = args.get('msg')
        check_status = args.get('check_status')
        config = args.get('config')
-		
+
        self.log(program, '[Action: %s]', action_type)
        action_list = action_type.split(',')

@ -544,6 +523,8 @@ class HealthCheck(object):
				@@ -544,6 +523,8 @@ class HealthCheck(object):
            self.action_email(program, action_type, msg, check_status)
        if 'wechat' in action_list and self.wechat_config:
            self.action_wechat(program, action_type, msg, check_status)
+        if 'dingding' in action_list and self.dingding_config:
+            self.action_dingding(program, action_type, msg, check_status)

    def action_supervisor_restart(self, program):
        """
@ -601,19 +582,19 @@ class HealthCheck(object):
				@@ -601,19 +582,19 @@ class HealthCheck(object):
            self.log(program, "[Action: exec] result %s", result)

        return result
-		
+
    def action_kill(self, program, pid):
        """
        杀死进程
        :param program:
        :param pid:
        :return:
-        """ 
+        """
        result = 'success'
-		
+
        if int(pid) < 3:
-            return 'Failed to kill %s, pid: %s '% (program, pid)
-		  
+            return 'Failed to kill %s, pid: %s ' % (program, pid)
+
        cmd = "kill -9 %s" % pid
        exitcode, stdout, stderr = shell(cmd)

@ -785,6 +766,51 @@ class HealthCheck(object):
				@@ -785,6 +766,51 @@ class HealthCheck(object):
        self.log(program, '[Action: wechat] send success')
        return True

+    def action_dingding(self, program, action_type, msg, check_status):
+        curr_dt = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+
+        hostname = platform.node().split('.')[0]
+        system_platform = platform.platform()
+
+        host = "oapi.dingtalk.com"
+        access_token = self.dingding_config.get('access_token')
+        send_url = '/robot/send?access_token=={access_token}'.format(token=access_token)
+
+        headers = {
+            'Content-Type': 'application/json'
+        }
+
+        if check_status == 'success':
+            title = "[%s] Health check successful" % program
+        else:
+            title = "[%s] Health check failed" % program
+
+        send_data = {"msgtype": "markdown",
+                     "markdown": {
+                         "title": title,
+                         "text": "#### 详情信息: \n> Program：%s \n\n> DataTime: %s \n\n> Hostname: %s \n\n> Platfrom: %s \n\n> Msg：%s" % (
+                         program, curr_dt, hostname, system_platform, msg)
+                     }
+                     }
+
+        try:
+            httpClient = httplib.HTTPSConnection(host, timeout=10)
+            httpClient.request("POST", send_url, json.dumps(data), headers=headers)
+            response = httpClient.getresponse()
+            result = json.loads(response.read())
+            if result['errcode'] != 0:
+                self.log(program, '[Action: dingding] send faild %s' % result)
+                return False
+        except Exception as e:
+            self.log(program, '[Action: dingding] send error %s' % e)
+            return False
+        finally:
+            if httpClient:
+                httpClient.close()
+
+        self.log(program, '[Action: dingding] send success')
+        return True
+
    def start(self):
        """
        启动检测
@ -804,10 +830,10 @@ class HealthCheck(object):
				@@ -804,10 +830,10 @@ class HealthCheck(object):
        for t in threads:
            t.setDaemon(True)
            t.start()
-                
+
        while 1:
            time.sleep(0.1)
-            for i,t in enumerate(threads):
+            for i, t in enumerate(threads):
                if not t.isAlive():
                    thread_name = t.getName()
                    self.log('ERROR', 'Exception in %s (catch by main): %s' % (thread_name, t.get_exception()))
@ -825,6 +851,7 @@ if __name__ == '__main__':
				@@ -825,6 +851,7 @@ if __name__ == '__main__':
        print("Exit check!")
        sys.exit(0)

+
    signal.signal(signal.SIGINT, sig_handler)
    signal.signal(signal.SIGTERM, sig_handler)
    signal.signal(signal.SIGQUIT, sig_handler)
@ -850,12 +877,14 @@ config:                                          # 脚本配置名称,请勿更
				@@ -850,12 +877,14 @@ config:                                          # 脚本配置名称,请勿更
 #    touser: 
 #    toparty: 
 #    totag: 
+#  dingding:                                     # 钉钉通知配置
+     access_token:

 # 内存方式监控
 cat1:                     # supervisor中配置的program名称
  type: mem               # 检查类型: http,tcp,mem,cpu  默认: http
-  maxRss: 1024            # 内存阈值, 超过则为检测失败. 单位MB, 默认: 1024
-  cumulative: True        # 是否统计子进程的内存, 默认: False
+  maxMem: 1024            # 内存阈值, 超过则为检测失败. 单位MB, 默认: 1024
+  memType: rss            # 内存使用分类：rss, pss, uss 默认：rss
  pidGet: supervisor      # 获取pid的方式: supervisor,name,file, 选择name时,按program名称搜索pid,选择file时,需指定pidFile 默认: supervisor
  pidFile: /var/run/t.pid # 指定pid文件的路径, 只在pidGet为file的时候有用
  periodSeconds: 10       # 检查的频率(以秒为单位), 默认: 5
--- a/shell/get_proc_mem.sh
+++ b/shell/get_proc_mem.sh
@ -0,0 +1,89 @@
				@@ -0,0 +1,89 @@
+#!/usr/bin/env bash
+
+
+pid=$1
+retries="${2:-0}"
+wait="${3:-1}"
+pid_smaps=""
+
+
+function get_meminfo() {
+  [ ! -f "/proc/${pid}/smaps" ] \
+    && { echo "[Error] not found $pid smaps file."; echo "Usage: bash $0 Pid Retries Wait, like: bash$0 1234 100 5"; exit 1; } \
+    || pid_smaps=$(cat /proc/${pid}/smaps)
+  
+  mem_info=$(cat /proc/meminfo)
+
+  mem_total=$(printf "%s" "${mem_info}"| awk '/^MemTotal:/  {print $2}')
+  mem_free=$(printf "%s" "${mem_info}"| awk '/^MemFree:/  {print $2}')
+  mem_available=$(printf "%s" "${mem_info}"| awk '/^MemAvailable:/  {print $2}')
+  size=$(printf "%s" "${pid_smaps}" | awk '/^Size/{sum += $2}END{print sum}')
+  rss=$(printf "%s" "${pid_smaps}" | awk '/^Rss/{sum += $2}END{print sum}')
+  pss=$(printf "%s" "${pid_smaps}" | awk '/^Pss/{sum += $2}END{print sum}')
+  
+  shared_clean=$(printf "%s" "${pid_smaps}" | awk '/^Shared_Clean/{sum += $2}END{print sum}')
+  shared_dirty=$(printf "%s" "${pid_smaps}" | awk '/^Shared_Dirty/{sum += $2}END{print sum}')
+  private_clean=$(printf "%s" "${pid_smaps}" | awk '/^Private_Clean/{sum += $2}END{print sum}')
+  private_dirty=$(printf "%s" "${pid_smaps}" | awk '/^Private_Dirty/{sum += $2}END{print sum}')
+  swap=$(printf "%s" "${pid_smaps}" | awk '/^Swap/{sum += $2}END{print sum}')
+  swap_pss=$(printf "%s" "${pid_smaps}" | awk '/^SwapPss/{sum += $2}END{print sum}')
+}
+
+count=0
+while [ $count -lt $retries ] ; do
+  get_meminfo
+  echo "Date: $(date +'%Y-%m-%d %T') MemTotal: $((mem_total/1024))MB MemFree: $((mem_free/1024))MB MemAvailable: $((mem_available/1024))MB RSS: $((${rss}/1024))MB PSS: $((${pss}/1024))MB USS: $(( (${private_clean} + ${private_dirty}) /1024 ))MB"
+  sleep $wait
+  count=$(($count + 1))
+done
+
+
+get_meminfo
+
+cat << EOF
+
+# OS meminfo
+MemTotal：内存总数
+MemFree：空闲内存数
+MemAvailable：可用内存数,包括cache/buffer、slab
+
+# Process smaps
+Size：表示该映射区域在虚拟内存空间中的大小。
+Rss： 表示该映射区域当前在物理内存中占用了多少空间
+      Rss=Shared_Clean+Shared_Dirty+Private_Clean+Private_Dirty
+Pss： 该虚拟内存区域平摊计算后使用的物理内存大小(有些内存会和其他进程共享，例如mmap进来的)
+      实际上包含下面private_clean+private_dirty，和按比例均分的shared_clean、shared_dirty。
+Uss:  Unique Set Size 进程独自占用的物理内存（不包含共享库占用的内存）
+      USS=Private_Clean+Private_Dirty
+Shared_Clean：  和其他进程共享的未被改写的page的大小
+Shared_Dirty：  和其他进程共享的被改写的page的大小
+Private_Clean： 未被改写的私有页面的大小。
+Private_Dirty： 已被改写的私有页面的大小。
+Swap：   存在于交换分区的数据大小(如果物理内存有限，可能存在一部分在主存一部分在交换分区)
+SwapPss: 计算逻辑就跟pss一样，只不过针对的是交换分区的内存。
+
+Pid: ${pid}
+Cmd: $(tr -d '\0' < /proc/${pid}/cmdline | cut -c1-80)
+User: $(id -nu < /proc/${pid}/loginuid )
+Threads: $(awk '/Threads:/ {print $2}' /proc/${pid}/status)
+
+File: /proc/${pid}/smaps
+
+# Os meminfo
+MemTotal:              ${mem_total} KB
+MemFree:               ${mem_free} KB
+MemAvailable:          ${mem_available} KB
+
+# Process smaps
+Size:                  ${size} KB
+RSS:                   ${rss} kB
+PSS:                   ${pss} kB
+Shared_Clean:          ${shared_clean} kB
+Shared_Dirty:          ${shared_dirty} kB
+Private_Clean:         ${private_clean} kB
+Private_Dirty:         ${private_dirty} kB
+Swap:                  ${swap} kB
+SwapPss:               ${swap_pss} kB
+
+USS:                   ${private_clean} + ${private_dirty} = $(( ${private_clean} + ${private_dirty} )) kB
+EOF
--- a/shell/keystore.sh
+++ b/shell/keystore.sh
@ -0,0 +1,75 @@
				@@ -0,0 +1,75 @@
+#!/bin/bash
+ 
+# 设置环境变量
+BASE_DIR=$(PWD)# 你需要修改此处
+CERT_OUTPUT_PATH="$BASE_DIR/certificates"
+PASSWORD=test1234
+KEY_STORE="$CERT_OUTPUT_PATH/server.keystore.jks"
+TRUST_STORE="$CERT_OUTPUT_PATH/server.truststore.jks"
+CLIENT_KEY_STORE="$CERT_OUTPUT_PATH/client.keystore.jks"
+CLIENT_TRUST_STORE="$CERT_OUTPUT_PATH/client.truststore.jks"
+KEY_PASSWORD=$PASSWORD
+STORE_PASSWORD=$PASSWORD
+TRUST_KEY_PASSWORD=$PASSWORD
+TRUST_STORE_PASSWORD=$PASSWORD
+CERT_AUTH_FILE="$CERT_OUTPUT_PATH/ca-cert"
+DAYS_VALID=3650
+DNAME="CN=Test, OU=YourDept, O=YourCompany, L=Shanghai, ST=Shanghai, C=CN"
+SUBJ="/C=CN/ST=Shanghai/L=Shanghai/O=YourCompany/OU=YourDept,CN=Test"
+ 
+mkdir -p $CERT_OUTPUT_PATH
+ 
+echo "1. 产生 key 和证书......"
+keytool -keystore $KEY_STORE -alias kafka-server -validity $DAYS_VALID -genkey -keyalg RSA \
+-storepass $STORE_PASSWORD -keypass $KEY_PASSWORD -dname "$DNAME"
+ 
+keytool -keystore $CLIENT_KEY_STORE -alias kafka-client -validity $DAYS_VALID -genkey -keyalg RSA \
+-storepass $STORE_PASSWORD -keypass $KEY_PASSWORD -dname "$DNAME"
+ 
+echo "2. 创建 CA......"
+openssl req -new -x509 -keyout $CERT_OUTPUT_PATH/ca-key -out "$CERT_AUTH_FILE" -days "$DAYS_VALID" \
+-passin pass:"$PASSWORD" -passout pass:"$PASSWORD" \
+-subj "$SUBJ"
+ 
+echo "3. 添加 CA 文件到 broker truststore......"
+keytool -keystore "$TRUST_STORE" -alias CARoot \
+-importcert -file "$CERT_AUTH_FILE" -storepass "$TRUST_STORE_PASSWORD" -keypass "$TRUST_KEY_PASS" -noprompt
+ 
+echo "4. 添加 CA 文件到 client truststore......"
+keytool -keystore "$CLIENT_TRUST_STORE" -alias CARoot \
+-importcert -file "$CERT_AUTH_FILE" -storepass "$TRUST_STORE_PASSWORD" -keypass "$TRUST_KEY_PASS" -noprompt
+ 
+echo "5. 从 keystore 中导出集群证书......"
+keytool -keystore "$KEY_STORE" -alias kafka-server -certreq -file "$CERT_OUTPUT_PATH/server-cert-file" \
+-storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
+ 
+keytool -keystore "$CLIENT_KEY_STORE" -alias kafka-client -certreq -file "$CERT_OUTPUT_PATH/client-cert-file" \
+-storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
+ 
+echo "6. 使用 CA 签发证书......"
+openssl x509 -req -CA "$CERT_AUTH_FILE" -CAkey $CERT_OUTPUT_PATH/ca-key -in "$CERT_OUTPUT_PATH/server-cert-file" \
+-out "$CERT_OUTPUT_PATH/server-cert-signed" -days "$DAYS_VALID" -CAcreateserial -passin pass:"$PASSWORD"
+ 
+openssl x509 -req -CA "$CERT_AUTH_FILE" -CAkey $CERT_OUTPUT_PATH/ca-key -in "$CERT_OUTPUT_PATH/client-cert-file" \
+-out "$CERT_OUTPUT_PATH/client-cert-signed" -days "$DAYS_VALID" -CAcreateserial -passin pass:"$PASSWORD"
+ 
+echo "7. 导入 CA 文件到 keystore......"
+keytool -keystore "$KEY_STORE" -alias CARoot -import -file "$CERT_AUTH_FILE" -storepass "$STORE_PASSWORD" \
+ -keypass "$KEY_PASSWORD" -noprompt
+ 
+keytool -keystore "$CLIENT_KEY_STORE" -alias CARoot -import -file "$CERT_AUTH_FILE" -storepass "$STORE_PASSWORD" \
+ -keypass "$KEY_PASSWORD" -noprompt
+ 
+echo "8. 导入已签发证书到 keystore......"
+keytool -keystore "$KEY_STORE" -alias kafka-server -import -file "$CERT_OUTPUT_PATH/server-cert-signed" \
+ -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
+ 
+keytool -keystore "$CLIENT_KEY_STORE" -alias kafka-client -import -file "$CERT_OUTPUT_PATH/client-cert-signed" \
+ -storepass "$STORE_PASSWORD" -keypass "$KEY_PASSWORD" -noprompt
+ 
+echo "9. 删除临时文件......"
+rm "$CERT_OUTPUT_PATH/ca-cert.srl"
+rm "$CERT_OUTPUT_PATH/server-cert-signed"
+rm "$CERT_OUTPUT_PATH/client-cert-signed"
+rm "$CERT_OUTPUT_PATH/server-cert-file"
+rm "$CERT_OUTPUT_PATH/client-cert-file"
--- a/shell/kube-logging.sh
+++ b/shell/kube-logging.sh
@ -0,0 +1,171 @@
				@@ -0,0 +1,171 @@
+#!/usr/bin/env bash
+
+# Copyright 2014 The Kubernetes Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Controls verbosity of the script output and logging.
+KUBE_VERBOSE="${KUBE_VERBOSE:-2}"
+
+# Handler for when we exit automatically on an error.
+# Borrowed from https://gist.github.com/ahendrix/7030300
+kube::log::errexit() {
+  local err="${PIPESTATUS[*]}"
+
+  # If the shell we are in doesn't have errexit set (common in subshells) then
+  # don't dump stacks.
+  set +o | grep -qe "-o errexit" || return
+
+  set +o xtrace
+  local code="${1:-1}"
+  # Print out the stack trace described by $function_stack
+  if [ ${#FUNCNAME[@]} -gt 2 ]
+  then
+    kube::log::error "Call tree:"
+    for ((i=1;i<${#FUNCNAME[@]}-1;i++))
+    do
+      kube::log::error " ${i}: ${BASH_SOURCE[${i}+1]}:${BASH_LINENO[${i}]} ${FUNCNAME[${i}]}(...)"
+    done
+  fi
+  kube::log::error_exit "Error in ${BASH_SOURCE[1]}:${BASH_LINENO[0]}. '${BASH_COMMAND}' exited with status ${err}" "${1:-1}" 1
+}
+
+kube::log::install_errexit() {
+  # trap ERR to provide an error handler whenever a command exits nonzero  this
+  # is a more verbose version of set -o errexit
+  trap 'kube::log::errexit' ERR
+
+  # setting errtrace allows our ERR trap handler to be propagated to functions,
+  # expansions and subshells
+  set -o errtrace
+}
+
+# Print out the stack trace
+#
+# Args:
+#   $1 The number of stack frames to skip when printing.
+kube::log::stack() {
+  local stack_skip=${1:-0}
+  stack_skip=$((stack_skip + 1))
+  if [[ ${#FUNCNAME[@]} -gt ${stack_skip} ]]; then
+    echo "Call stack:" >&2
+    local i
+    for ((i=1 ; i <= ${#FUNCNAME[@]} - stack_skip ; i++))
+    do
+      local frame_no=$((i - 1 + stack_skip))
+      local source_file=${BASH_SOURCE[${frame_no}]}
+      local source_lineno=${BASH_LINENO[$((frame_no - 1))]}
+      local funcname=${FUNCNAME[${frame_no}]}
+      echo "  ${i}: ${source_file}:${source_lineno} ${funcname}(...)" >&2
+    done
+  fi
+}
+
+# Log an error and exit.
+# Args:
+#   $1 Message to log with the error
+#   $2 The error code to return
+#   $3 The number of stack frames to skip when printing.
+kube::log::error_exit() {
+  local message="${1:-}"
+  local code="${2:-1}"
+  local stack_skip="${3:-0}"
+  stack_skip=$((stack_skip + 1))
+
+  if [[ ${KUBE_VERBOSE} -ge 4 ]]; then
+    local source_file=${BASH_SOURCE[${stack_skip}]}
+    local source_line=${BASH_LINENO[$((stack_skip - 1))]}
+    echo "!!! Error in ${source_file}:${source_line}" >&2
+    [[ -z ${1-} ]] || {
+      echo "  ${1}" >&2
+    }
+
+    kube::log::stack ${stack_skip}
+
+    echo "Exiting with status ${code}" >&2
+  fi
+
+  exit "${code}"
+}
+
+# Log an error but keep going.  Don't dump the stack or exit.
+kube::log::error() {
+  timestamp=$(date +"[%m%d %H:%M:%S]")
+  echo "!!! ${timestamp} ${1-}" >&2
+  shift
+  for message; do
+    echo "    ${message}" >&2
+  done
+}
+
+# Print an usage message to stderr.  The arguments are printed directly.
+kube::log::usage() {
+  echo >&2
+  local message
+  for message; do
+    echo "${message}" >&2
+  done
+  echo >&2
+}
+
+kube::log::usage_from_stdin() {
+  local messages=()
+  while read -r line; do
+    messages+=("${line}")
+  done
+
+  kube::log::usage "${messages[@]}"
+}
+
+# Print out some info that isn't a top level status line
+kube::log::info() {
+  local V="${V:-0}"
+  if [[ ${KUBE_VERBOSE} < ${V} ]]; then
+    return
+  fi
+
+  for message; do
+    echo "${message}"
+  done
+}
+
+# Just like kube::log::info, but no \n, so you can make a progress bar
+kube::log::progress() {
+  for message; do
+    echo -e -n "${message}"
+  done
+}
+
+kube::log::info_from_stdin() {
+  local messages=()
+  while read -r line; do
+    messages+=("${line}")
+  done
+
+  kube::log::info "${messages[@]}"
+}
+
+# Print a status line.  Formatted to show up in a stream of output.
+kube::log::status() {
+  local V="${V:-0}"
+  if [[ ${KUBE_VERBOSE} < ${V} ]]; then
+    return
+  fi
+
+  timestamp=$(date +"[%m%d %H:%M:%S]")
+  echo "+++ ${timestamp} ${1}"
+  shift
+  for message; do
+    echo "    ${message}"
+  done
+}
--- a/shell/linux采集/caiji.sh
+++ b/shell/linux采集/caiji.sh
@ -0,0 +1,83 @@
				@@ -0,0 +1,83 @@
+#!/bin/bash
+# create log file folder
+test -e /var/log/ecsanalyse || mkdir /var/log/ecsanalyse;
+datetime=$(date +%Y%m%d-%H-%M-%S-%N)
+log_filename=ecs_analyse_${datetime}.log
+log_file=/var/log/ecsanalyse/$log_filename
+
+# script start------------
+echo "##*problem_total_analyse" >> $log_file 2>&1
+echo "###dos-ff" >> $log_file 2>&1
+file /etc/passwd >> $log_file 2>&1
+file /etc/shadow >> $log_file 2>&1
+file /etc/pam.d/* >> $log_file 2>&1
+echo "###limits" >> $log_file 2>&1
+cat /etc/security/limits.conf | grep -Ev "^$|[#;]" >> $log_file 2>&1
+echo "###virtio-net-multiqueue" >> $log_file 2>&1
+for i in $(ip link | grep -E "^[0-9]+: .*:" -o | cut -d ":" -f 2 | grep -v lo); do echo $i >> $log_file 2>&1; ethtool -l $i 2>/dev/null | grep Combined >> $log_file 2>&1; done
+echo "###passwd" >> $log_file 2>&1
+cat /etc/passwd >> $log_file 2>&1
+echo "###cpu-top-5" >> $log_file 2>&1
+top -b -n 1 | grep "%Cpu(s):" >> $log_file 2>&1
+ps -eT -o%cpu,pid,tid,ppid,comm | grep -v CPU | sort -n -r | head -5 >> $log_file 2>&1
+echo "###ssh-perm" >> $log_file 2>&1
+echo "***centos" >> $log_file 2>&1
+ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /var/empty/* /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1
+echo "***ubuntu" >> $log_file 2>&1
+ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1
+echo "***debian" >> $log_file 2>&1
+ls -l /etc/passwd /etc/shadow /etc/group /etc/gshadow /etc/securetty* /etc/security/* /etc/ssh/* >> $log_file 2>&1
+echo "###blkid" >> $log_file 2>&1
+blkid >> $log_file 2>&1
+echo "###osinfo" >> $log_file 2>&1
+if test -f "/etc/os-release"; then
+cat /etc/os-release | egrep "^NAME=|^VERSION=" >> $log_file 2>&1
+else
+echo "no os-release" >> $log_file 2>&1
+echo "no os-release" >> $log_file 2>&1
+fi
+if test -f "/etc/redhat-release" ; then
+echo "redhat-release:" $(cat /etc/redhat-release) >> $log_file 2>&1
+else
+echo "no redhat-release" >> $log_file 2>&1
+fi
+echo "uname: " $(uname -a) >> $log_file 2>&1
+echo "uname short\: " $(uname -r) >> $log_file 2>&1
+echo "###softlink" >> $log_file 2>&1
+ls -l / | grep "\->" >> $log_file 2>&1
+echo "###iptables" >> $log_file 2>&1
+echo "***centos-5" >> $log_file 2>&1
+service iptables status >> $log_file 2>&1
+echo "***centos-6" >> $log_file 2>&1
+service iptables status >> $log_file 2>&1
+echo "***centos-7" >> $log_file 2>&1
+firewall-cmd --state >> $log_file 2>&1
+echo "***ubuntu" >> $log_file 2>&1
+ufw status >> $log_file 2>&1
+echo "***default" >> $log_file 2>&1
+iptables -L >> $log_file 2>&1
+echo "###sysctl" >> $log_file 2>&1
+cat /etc/sysctl.conf | grep nr_hugepages >> $log_file 2>&1
+echo -e "net.ipv4.tcp_tw_recycle=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_tw_recycle >> $log_file 2>&1
+echo -e "net.ipv4.tcp_timestamps=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_timestamps >> $log_file 2>&1
+echo -e "fs.nr_open=\c" >> $log_file 2>&1 && cat /proc/sys/fs/nr_open >> $log_file 2>&1
+echo -e "net.ipv4.tcp_sack=\c" >> $log_file 2>&1 && cat /proc/sys/net/ipv4/tcp_sack >> $log_file 2>&1
+echo "###fstab" >> $log_file 2>&1
+cat /etc/fstab | grep -Ev "^$|[#;]" >> $log_file 2>&1
+echo "###dmesg" >> $log_file 2>&1
+cat /proc/uptime >> $log_file 2>&1
+dmesg | grep "invoked oom-killer" | tail -n 1 >> $log_file 2>&1
+echo "###port-usage" >> $log_file 2>&1
+netstat -tapn | grep LISTEN | grep -E 'sshd' >> $log_file 2>&1
+echo "###selinux" >> $log_file 2>&1
+echo "***default" >> $log_file 2>&1
+getenforce >> $log_file 2>&1
+echo "***ubuntu" >> $log_file 2>&1
+service selinux status > /dev/null; echo $? >> $log_file 2>&1
+echo "***debian" >> $log_file 2>&1
+service selinux status > /dev/null; echo $? >> $log_file 2>&1
+echo "###meminfo" >> $log_file 2>&1
+cat /proc/meminfo | grep Hugepagesize >> $log_file 2>&1
+cat /proc/meminfo | grep MemTotal >> $log_file 2>&1
+# script end------------
+
--- a/shell/linux采集/oos_noak.sh
+++ b/shell/linux采集/oos_noak.sh
@ -0,0 +1,354 @@
				@@ -0,0 +1,354 @@
+#!/bin/bash
+# create log file folder
+
+test -e /var/log/ecsanalyse || mkdir -p /var/log/ecsanalyse;
+datetime=$(date +%Y%m%d-%H-%M-%S-%N)
+log_filename=ecs_analyse_${datetime}.log
+log_file=/var/log/ecsanalyse/$log_filename
+
+#set var
+regionId=$(curl -s http://100.100.100.200/latest/meta-data/region-id)
+mount_dir=/mnt
+disk_vdb=$(blkid |grep -v vda | awk -F : '{print $1}')
+fstab=/mnt/etc/fstab
+grub_f=/mnt/boot/grub/grub.cfg
+grub2_f=/mnt/boot/grub2/grub.cfg
+selinux_config_file=/mnt/etc/selinux/config
+sysctl_config_file=/mnt/etc/sysctl.conf
+rc_local_file=/mnt/etc/rc.local
+sshdconfig_file=/mnt/etc/ssh/sshd_config
+passfile=/mnt/etc/passwd
+passbakfile="/mnt/etc/passwd-"
+shadowfile=/mnt/etc/shadow
+shadowbakfile="/mnt/etc/shadow-"
+system_user=(root daemon nobody dbus polkitd sshd messagebus)
+system_dir=(/mnt/bin /mnt/sbin /mnt/usr/bin /mnt/usr/sbin /mnt/lib /mnt/lib64 /mnt/usr/lib /mnt/usr/lib64 /mnt/etc /mnt/boot /mnt/var /mnt/var)
+###change new instance root passwd to rand passwd
+newpw=$(openssl rand -base64 10)
+echo root:${newpw}|chpasswd
+echo "The new instance passwd is ${newpw}" >>$log_file 2>&1
+
+###define upload log function
+function upload_log(){
+    cd /var/log/ecsanalyse;
+    test -e ossutil64 || wget http://gosspublic.alicdn.com/ossutil/1.6.10/ossutil64 && chmod 755 /var/log/ecsanalyse/ossutil64;
+    /var/log/ecsanalyse/ossutil64 -i your_ak -k your_sk -e oss-cn-beijing.aliyuncs.com cp -f $log_file oss://public-beijing/ooslogs/
+
+}
+
+###yuanyuan checkdisk and mount disk
+function check_vdb_is_exist()
+{
+if [ ! -n "$disk_vdb" ]
+then
+   echo "Disk can not find in this system ,exit " >>$log_file 2>&1
+   upload_log
+   exit
+fi
+}
+
+
+function mount_disk()
+{
+    test -e $mount_dir || mkdir  -p $mount_dir
+    if mountpoint -q $mount_dir;then umount $mount_dir;fi
+    mount $disk_vdb  $mount_dir  >>$log_file 2>&1
+}
+
+function get_disk_usage()
+{
+    disk_usage=$(df -h $mount_dir | tail -n 1 | awk '{print $5}')
+    disk_inode_usage=$(df -i $mount_dir | tail -n 1| awk '{print $5}')
+    echo "disk space usage: $disk_usage" >>$log_file 2>&1
+    echo "disk inode usage: $disk_inode_usage" >>$log_file 2>&1
+}
+
+function repair_fs()
+{
+    if grep -qs "$mount_dir" /proc/mounts
+    then
+       echo "Check MNT dir is mounted,umount it" >>$log_file 2>&1
+       umount  $mount_dir
+    fi
+
+    fs_type=$(blkid | grep -v vda )
+    if [[ "$fs_type" =~ "ext" ]]
+    then
+       if  [[ "$(tune2fs -l $disk_vdb|grep state)" =~ "clean" ]]
+          then
+              echo "Extfs is clean ,dont need fsck" >>$log_file 2>&1
+          else
+              echo "This line means that the file system need fsck，its ext" >>$log_file 2>&1
+              fsck  -y $disk_vdb >>$log_file 2>&1
+       fi
+    else
+        echo "This line means that the file system is xfs " >>$log_file 2>&1
+        xfs_repair $disk_vdb >>$log_file 2>&1
+    fi
+}
+
+function  main(){
+check_vdb_is_exist
+repair_fs
+mount_disk && get_disk_usage
+}
+
+###muyuan Check the existence of system important directories
+function check_sys_important_dir(){
+    for important_dir in ${system_dir[@]}
+    do
+        if [ ! -e $important_dir ]
+        then
+        echo "This sys dir or link $important_dir does not exist " >> $log_file 2>&1
+        fi
+    done
+    echo "Check the existence of system important directories...done"  >> $log_file 2>&1
+}
+###install dos2unix
+function install_dos2unix(){
+    if ! which dos2unix ;then
+        yum -y install dos2unix >> $log_file 2>&1
+        sleep 5s
+        dos2unix ${passfile} >> $log_file 2>&1
+        dos2unix ${shadowfile} >> $log_file 2>&1
+    else
+        dos2unix ${passfile} >> $log_file 2>&1
+        dos2unix ${shadowfile} >> $log_file 2>&1
+    fi
+}
+###check / 777
+function check_rootdir_permission(){
+    for dirfile in $( ls -l /mnt |grep rwxrwx|egrep -v "\->|tmp"|awk '{print "/mnt/"$NF}');
+        do 
+            if [ -d "$dirfile" ] 
+                then 
+                echo "This dir ${dirfile} permission is 777,chmod to 755" >> $log_file 2>&1
+                chmod -R 755 ${dirfile} >> $log_file 2>&1
+                else 
+                echo "This file ${dirfile} permission is 777,chmod to 644" >> $log_file 2>&1
+                chmod 644 ${dirfile} >> $log_file 2>&1
+            fi 
+        done
+        echo "Check /mnt dir permission 777 ...done " >> $log_file 2>&1
+}
+###check ssh 777
+function check_ssh_permission(){
+    for sshfile in $(ls -l  /mnt/etc/ssh | grep rwxrwx | awk '{print "/mnt/etc/ssh/"$NF}')
+        do
+            if [ "${sshfile##*.}"x = "pub"x  ] || [[ "${sshfile}"x =~ "_config"x ]] || [[ "${sshfile}"x =~ "moduli"x ]] 
+                then
+                echo "Change file ${sshfile} permission to 644" >> $log_file 2>&1
+                chmod 644 ${sshfile} >> $log_file 2>&1
+                else
+                echo "Change file ${sshfile} permission to 600" >> $log_file 2>&1
+                chmod 600 ${sshfile} >> $log_file 2>&1
+            fi
+        done
+    echo "Check /mnt/etc/ssh dir permission 777 ...done "  >> $log_file 2>&1
+}
+###change empty_sshd permission
+function check_empty_sshd(){
+    if [ -d /mnt/var/empty/sshd ];then
+    echo "change empty_sshd permission to 711"  >> $log_file 2>&1
+    chmod 711 /mnt/var/empty/sshd;else
+    echo "This system does not have dir /var/empty/sshd"  >> $log_file 2>&1
+    fi
+}
+###check selinux
+function disable_selinux(){
+    if [ -e ${selinux_config_file} ];then
+        echo "anon selinux and add a new disable line "  >>$log_file 2>&1
+        sed -i 's/^SELINUX=/#SELINUX=/g' ${selinux_config_file}
+        sed -i '/^#SELINUX=/a\SELINUX=disabled' ${selinux_config_file}
+    else
+        echo "this os does not have selinux config file" >>$log_file 2>&1
+    fi
+
+}
+###check sysctl
+function disable_sysctl(){
+    if [[ ${sysctl_line_count} -ge 50 ]]; then
+        echo "sysctl.conf line count ge 100,so anon all line" >>$log_file 2>&1
+        sed -i 's/^/#/g' ${sysctl_config_file} 
+    else
+        echo "check and anon nr_hugepages" >>$log_file 2>&1
+        sed -i 's/^vm.nr_hugepages/#vm.nr_hugepages/g' ${sysctl_config_file} 
+        echo "check and anon min_free_kbytes" >>$log_file 2>&1
+        sed -i 's/^vm.min_free_kbytes/#vm.min_free_kbytes/g' ${sysctl_config_file} 
+    fi
+}
+###check rc.local
+function disable_rc_local(){
+    if [ -e ${rc_local_file} ];then
+        echo "anon rc_local line"  >>$log_file 2>&1
+        sed -i 's/^/#/g' ${rc_local_file}
+    else
+        echo "This instance does not have rc_local files" >>$log_file 2>&1
+    fi
+}
+
+###check system release
+function check_os_type(){
+    if [ -e /mnt/etc/os-release ];then
+        os_type_1=$(grep -i "^ID=" /mnt/etc/os-release |awk -F "=" '{print tolower($NF)}'|tr -d "\"")
+        echo "This instance os_type is ${os_type_1}"  >> $log_file 2>&1
+    else
+        os_type_2=$(head -n 1 /mnt/etc/issue.net |awk '{print  tolower($1)}')
+        echo "may be,this instance is ${os_type_2} " >> $log_file 2>&1
+    fi 
+}
+###Check passwd 
+function check_system_username(){
+    if [ -e "${passfile}" ] && [ -s "${passfile}" ] ;then
+        echo "passwd file is not zero,start check username"  >> $log_file 2>&1
+        echo -e "##########\n#please note \n#centos & redhat does not have messagebus user  \n#ubuntu & debian does not have dbus & polkitd user\n##########" >> $log_file 2>&1
+        for c_user in ${system_user[@]}
+        do
+            if ! egrep -qs "^${c_user}" ${passfile} ;then
+                echo "This account ${c_user} not in ${passfile}" >> $log_file 2>&1
+                grep  ^${c_user} ${passbakfile} >> ${passfile}
+                if [ $? -eq 0 ];then
+                echo "${passbakfile} has this account ${c_user},Restore the account to ${passfile}" >> $log_file 2>&1
+                else
+                echo "${passbakfile} does not have this account ${c_user},can not restore from the bakfile" >> $log_file 2>&1
+                fi
+            else
+                echo "This account ${c_user} is ok" >> $log_file 2>&1
+            fi
+        done
+    elif [ -e "${passbakfile}" ] && [ -s "${passbakfile}" ]; then
+        echo "File ${passfile} size is zero or not exists，bcakup to passwd.bak and use ${passbakfile} restore" >> $log_file 2>&1
+        mv ${passfile} ${passfile}.bak
+        cp ${passbakfile} ${passfile}
+    else
+        echo "The ${passbakfile} file not exists or size is zero" >> $log_file 2>&1
+    fi
+}
+###check shadow
+function check_root_pass_shadow(){
+    echo "anon root in passwd & shadow and add new root no passwd " >> $log_file 2>&1
+    sed -i 's/^root/#root/g' ${passfile}
+    sed -i 's/^root/#root/g' ${shadowfile} 
+    echo "root:x:0:0:root:/root:/bin/bash" >> ${passfile} 
+    echo "root::18340:0:99999:7:::" >> ${shadowfile}
+
+}
+###shibin check uuid fstab
+function annotation_datadisk(){
+    base_l="`cat ${fstab} | sed '/^$/d' |grep -v "#" | grep -v swap | grep -w -v /`"
+    echo "In fun [annotation_datadisk]: " >> $log_file 2>&1 
+    echo "${base_l}" | while read line; do
+	echo "blanking [ $line ]" >>  $log_file 2>&1
+        sed  -i "s#$line#\#$line#g" $fstab
+    done
+}
+
+function replace_sysdisk_by_diskname(){
+    echo "In fun [replace_sysdisk_by_diskname]: " >> $log_file 2>&1 
+    sys_disk="`cat $fstab | sed '/^$/d'| grep -w  /`"
+    line_r="/dev/vda1       /       ext4    defaults    1 1"
+    if [ "$sys_disk" != "" ]; then
+	echo "Setting [ $sys_disk ] as [ $line_r ]" >> $log_file 2>&1
+        sed -i "s#$sys_disk#$line_r#g" $fstab
+    fi
+}
+function reset_grub_by_diskname(){
+    echo "In fun [reset_grub_by_diskname]: " >> $log_file 2>&1 
+    uuid="`blkid |grep vdb| awk '{print $2}'| sed 's/\"//g'`"
+    if [ -f ${grub2_f} ]; then
+	echo "cp ${grub2_f} ${grub2_f}_${datetime}" >> $log_file 2>&1
+        cp ${grub2_f} ${grub2_f}_${datetime}
+        sed -i "s#$uuid#/dev/vda1#g" ${grub2_f}
+    elif [ -f ${grub_f} ]; then
+        cp ${grub_f} ${grub_f}_${datetime}
+        sed -i "s#$uuid#/dev/vda1#g" ${grub_f}
+    fi
+
+}
+function backup_fstab(){
+    echo "In fun [backup_fstab]" >> $log_file 2>&1
+    echo  " cp ${fstab} ${fstab}_${datetime}" >> $log_file 2>&1 
+    cp ${fstab} ${fstab}_${datetime}
+}
+###nvshen check sshdcofig
+function check_sshdconfig()
+{
+    echo "check PermitRootLogin and change to yes" >>$log_file 2>&1
+    sed -i 's/^PermitRootLogin.*$/PermitRootLogin yes/' ${sshdconfig_file}
+    echo "check AllowUsers AllowGroups DenyUsers DenyGroups and Comment out" >>$log_file 2>&1
+    sed -i 's/^AllowUsers.*$/#AllowUsers/' ${sshdconfig_file}
+    sed -i 's/^AllowGroups.*$/#AllowGroups/' ${sshdconfig_file}
+    sed -i 's/^DenyUsers.*$/#DenyUsers/' ${sshdconfig_file}
+    sed -i 's/^DenyGroups.*$/#DenyGroups/' ${sshdconfig_file}
+    echo "check LoginGraceTime and cahnge to 100s" >>$log_file 2>&1
+    sed -i 's/^LoginGraceTime.*$/LoginGraceTime 100/' ${sshdconfig_file}
+}
+
+###start check
+echo "mount bad sysdisk to mnt" >> $log_file 2>&1
+main
+###set var sysctl line count
+sysctl_line_count=$(awk 'END{print NR}' /mnt/etc/sysctl.conf)
+echo "Check sysdisk mount ready" >> $log_file 2>&1
+mountpoint /mnt >> $log_file 2>&1
+if [ $? -ne 0 ]
+    then
+    echo "The /mnt dir  does not ready mount" >> $log_file 2>&1
+    else
+    echo "The /mnt dir mount ready ,start check..." >> $log_file 2>&1
+    echo "Start...Check the existence of system important directories" >> $log_file 2>&1
+    check_sys_important_dir >>$log_file 2>&1
+    sleep 3s
+    echo "Check /mnt dir permission 777 and change it " >> $log_file 2>&1
+    check_rootdir_permission >>$log_file 2>&1
+    sleep 3s
+    echo "Check /mnt/etc/ssh dir permission 777 and change it" >> $log_file 2>&1
+    check_ssh_permission >>$log_file 2>&1
+    sleep 3s
+    echo "backup  fstab " >> $log_file 2>&1
+    backup_fstab >>$log_file 2>&1
+    sleep 3s
+    echo "replace fstab sysdisk  " >> $log_file 2>&1
+    replace_sysdisk_by_diskname >>$log_file 2>&1
+    sleep 3s
+    echo "anon datadisk in fstab " >> $log_file 2>&1
+    annotation_datadisk >>$log_file 2>&1
+    sleep 3s
+    echo "replace uuid in grub " >> $log_file 2>&1
+    reset_grub_by_diskname >>$log_file 2>&1
+    sleep 3s
+    echo "check and disable selinux"  >>$log_file 2>&1
+    disable_selinux >>$log_file 2>&1
+    sleep 3s
+    echo "check and disable sysctl var set"  >>$log_file 2>&1
+    disable_sysctl >>$log_file 2>&1
+    sleep 3s
+    echo "anon rc_local all line"  >>$log_file 2>&1
+    disable_rc_local
+    sleep 3s
+    echo "check sshd_config"  >>$log_file 2>&1
+    check_sshdconfig
+    sleep 3s
+    echo "check empty sshd dir and change dir permission to 711" >>$log_file 2>&1
+    check_empty_sshd
+    sleep 3s
+    echo "Check instance os_type" >>$log_file 2>&1
+    check_os_type
+    sleep 3s
+    echo "install dos2unix " >>$log_file 2>&1
+    install_dos2unix
+    sleep 3s
+    echo "Check passwd file" >>$log_file 2>&1
+    check_system_username
+    sleep 3s
+    echo "Check shadow file" >>$log_file 2>&1
+    check_root_pass_shadow
+fi
+# script end------------
+sleep 30s
+
+# upload logs to OSS
+cd /var/log/ecsanalyse;
+test -e ossutil64 || wget http://gosspublic.alicdn.com/ossutil/1.6.10/ossutil64 && chmod 755 /var/log/ecsanalyse/ossutil64;
+/var/log/ecsanalyse/ossutil64 -i your_ak -k your_sk -e oss-cn-beijing.aliyuncs.com cp -f $log_file oss://public-beijing/ooslogs/
+
--- a/shell/template.sh
+++ b/shell/template.sh
@ -14,11 +14,23 @@ set -o nounset          # Disallow expansion of unset variables
				@@ -14,11 +14,23 @@ set -o nounset          # Disallow expansion of unset variables
 set -o pipefail         # Use last non-zero exit code in a pipeline


+######################################################################################################
+# environment configuration
+######################################################################################################
+
+
 TAG="CMD"
 LOG_PATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )/logs"
 LOG_FILE="$LOG_PATH/example_`date +"%Y%m%d"`.log"
 HIDE_LOG=true

+
+
+######################################################################################################
+# function
+######################################################################################################
+
+
 function log() {
    [ ! -d "$LOG_PATH" ] && mkdir -p $LOG_PATH
    if [ $HIDE_LOG ]; then
@ -55,4 +67,9 @@ function main() {
				@@ -55,4 +67,9 @@ function main() {

 }

+######################################################################################################
+# main 
+######################################################################################################
+
+
 main "${@}"
--- a/shell/utils.sh
+++ b/shell/utils.sh
@ -0,0 +1,102 @@
				@@ -0,0 +1,102 @@
+
+
+util::download_file() {
+  local -r url=$1
+  local -r destination_file=$2
+
+  rm "${destination_file}" 2&> /dev/null || true
+
+  for i in $(seq 5)
+  do
+    if ! curl -fsSL --retry 3 --keepalive-time 2 "${url}" -o "${destination_file}"; then
+      echo "Downloading ${url} failed. $((5-i)) retries left."
+      sleep 1
+    else
+      echo "Downloading ${url} succeed"
+      return 0
+    fi
+  done
+  return 1
+}
+
+
+# Example:  util::wait_for_success 120 5 "kubectl get nodes|grep localhost"
+# arguments: wait time, sleep time, shell command
+# returns 0 if the shell command get output, 1 otherwise.
+util::wait_for_success(){
+  local wait_time="$1"
+  local sleep_time="$2"
+  local cmd="$3"
+  while [ "$wait_time" -gt 0 ]; do
+    if eval "$cmd"; then
+      return 0
+    else
+      sleep "$sleep_time"
+      wait_time=$((wait_time-sleep_time))
+    fi
+  done
+  return 1
+}
+
+util::host_os() {
+  local host_os
+  case "$(uname -s)" in
+    Darwin)
+      host_os=darwin
+      ;;
+    Linux)
+      host_os=linux
+      ;;
+    *)
+      echo "Unsupported host OS.  Must be Linux or Mac OS X."
+      exit 1
+      ;;
+  esac
+  echo "${host_os}"
+}
+
+util::host_arch() {
+  local host_arch
+  case "$(uname -m)" in
+    x86_64*)
+      host_arch=amd64
+      ;;
+    i?86_64*)
+      host_arch=amd64
+      ;;
+    amd64*)
+      host_arch=amd64
+      ;;
+    aarch64*)
+      host_arch=arm64
+      ;;
+    arm64*)
+      host_arch=arm64
+      ;;
+    arm*)
+      host_arch=arm
+      ;;
+    i?86*)
+      host_arch=x86
+      ;;
+    s390x*)
+      host_arch=s390x
+      ;;
+    ppc64le*)
+      host_arch=ppc64le
+      ;;
+    *)
+      echo "Unsupported host arch. Must be x86_64, 386, arm, arm64, s390x or ppc64le."
+      exit 1
+      ;;
+  esac
+  echo "${host_arch}"
+}
+
+util::md5() {
+  if which md5 >/dev/null 2>&1; then
+    md5 -q "$1"
+  else
+    md5sum "$1" | awk '{ print $1 }'
+  fi
+}