monitor.sh 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136
  1. #!/bin/bash
  2. # sh monitor.sh monitor.cfg > /dev/null 2>&1
  3. # 系统监控:
  4. # 网络 内存 CPU 磁盘
  5. # 超过配置压力, 邮件发送${mail_to}帐号告警
  6. readcfg() {
  7. cat ${monitor_cfg}|sed -e '/^;/d;/^$/d'|awk -F= '$1=="'$1'" {print $2}'
  8. }
  9. send_mail() {
  10. echo -e "Hi,\n\t${hostname} occurred a problem!\n" > ${file_tmp}
  11. echo -e "\n" >> ${file_tmp}
  12. cat ${file_error_log} >> ${file_tmp}
  13. echo -e "\n\n--${hostname}" >> ${file_tmp}
  14. cat ${file_tmp} | mail -s "!Important:The ${hostname} monitor warn" ${mail_to} > /dev/null 2>&1 &
  15. }
  16. monitor() {
  17. count_users=$(/usr/bin/who|wc -l)
  18. avg_load=$(uptime|awk -F"load average:" '{print $2}'|awk -F, '{print $3}'|sed 's/ *//')
  19. total_memery=$(free |sed -n "2p"|awk '{print $2}')
  20. used_memery=$(free |sed -n "2p"|awk '{print $3}')
  21. rate_memery=$(awk 'BEGIN {x='${used_memery}';y='${total_memery}';printf "%.2f\n",(x/y)*100}')
  22. total_swap=$(free |sed -n "2p"|awk '{print $2}')
  23. used_swap=$(free |sed -n "2p"|awk '{print $3}')
  24. rate_swap=$(awk 'BEGIN {x='${used_swap}';y='${total_swap}';printf "%.2f\n",(x/y)*100}')
  25. avg_cpu_idle=$(iostat -c 1 2|awk 'NR==7 {print $6}')
  26. rate_cpu=$(awk 'BEGIN {i='${avg_cpu_idle}';printf "%.2f\n",100-i}')
  27. io_state=`iostat -x 1 1|awk '{if ($1 ~/^sd/) print $1,$2"%"}'|xargs`
  28. disk=$(df|sed -e '1d;s/\/dev\///g'|awk '{printf "%s:%s\n",$1,$5}'|xargs)
  29. # save data
  30. echo -e ">>>>----------------------------------------------------------------------------------------
  31. ${hostname} ${curr_time}
  32. Login User: [${count_users} user login]
  33. Load average: ${avg_load}
  34. Memory usage: ${rate_memery}%
  35. Swap usage: ${rate_swap}%
  36. CPU usage: ${rate_cpu}%
  37. Hard-disk IO: ${io_state}
  38. Hard-disk space: ${disk}\n----------------------------------------------------------------------------------------<<<<" > ${file_tmp}
  39. cat ${file_tmp} >> ${file_log}
  40. #Check CPU
  41. if [[ $(echo "${rate_cpu}>=${pole_cpu}"|bc) -eq 1 ]]; then
  42. echo "Current CPU use rate:${rate_cpu}%, more than ${pole_cpu}% -- ${curr_time}" >> ${file_error_log}
  43. echo "Current CPU use rate:${rate_cpu}%, more than ${pole_cpu}% -- ${curr_time}" >> ${file_log}
  44. send_mail
  45. fi
  46. #Check memery
  47. if [[ $(echo "${rate_memery}>=${pole_memery}"|bc) -eq 1 ]]; then
  48. echo "Current memery use rate:${rate_memery}%, more than ${pole_memery}% -- ${curr_time}" >> ${file_error_log}
  49. echo "Current memery use rate:${rate_memery}%, more than ${pole_memery}% -- ${curr_time}" >> ${file_log}
  50. send_mail
  51. fi
  52. #Check Load average
  53. if [[ $(echo "${avg_load}>=${pole_load}"|bc) -eq 1 ]]; then
  54. echo "Load average every 15 minutes is ${avg_load}, more than ${pole_load} -- ${curr_time}" >> ${file_error_log}
  55. echo "Load average every 15 minutes is ${avg_load}, more than ${pole_load} -- ${curr_time}" >> ${file_log}
  56. send_mail
  57. fi
  58. #Check Hard-disk space
  59. for free_disk in `df|sed -e '1d;s/%//g'|awk '{print $5}'|xargs` ; do
  60. if [[ "${free_disk}" -ge "${pole_ide}" ]]; then
  61. echo "Current hard disk use rate:${pole_ide}% -- ${curr_time}" >> ${file_error_log}
  62. echo "Current hard disk use rate:${pole_ide}% -- ${curr_time}" >> ${file_log}
  63. send_mail
  64. break
  65. fi
  66. done
  67. #Check hard disk I/O
  68. for io_disk in `iostat -x 1 1|awk '{if ($1 ~/^sd/) print $2}'`
  69. do
  70. DiskIoTest=$(echo "${io_disk}>=${pole_hdio}"|bc)
  71. if [[ $(echo "${io_disk}>=${pole_hdio}"|bc) -ne 0 ]]; then
  72. echo "Current hard disk I/O is ${io_disk}%, more than ${pole_hdio}% -- ${curr_time}" >> ${file_error_log}
  73. echo "Current hard disk I/O is ${io_disk}%, more than ${pole_hdio}% -- ${curr_time}" >> ${file_log}
  74. send_mail
  75. break
  76. fi
  77. done
  78. }
  79. main() {
  80. hostname=`hostname`
  81. curr_date=`date +%Y%m%d`
  82. curr_time=`date "+%Y-%m-%d %H:%M:%S"`
  83. #Check the system environment
  84. path_who=`which who`
  85. path_top=`which top`
  86. path_iostat=`which iostat`
  87. if [[ ${path_iostat} == '' ]]; then
  88. echo 'sysstat does not be installed, you should install it firstly!'
  89. exit 1
  90. fi
  91. pole_cpu=`readcfg pole_cpu`
  92. pole_memery=`readcfg pole_memery`
  93. pole_load=`readcfg pole_load`
  94. pole_ide=`readcfg pole_ide`
  95. pole_hdio=`readcfg pole_hdio`
  96. monitor_network_interface="`readcfg monitor_network_interface`"
  97. log_path=`readcfg log_path`
  98. if [[ ! -d ${log_path} ]]; then
  99. mkdir ${log_path}
  100. fi
  101. mail_to=`readcfg mail_to`
  102. if [[ ${mail_to} == '' ]]; then
  103. echo 'You should set the mail recipient firstly!'
  104. exit 1
  105. fi
  106. file_tmp="${log_path}/.tmp"
  107. file_log="${log_path}/${curr_date}.log"
  108. file_error_log="${log_path}/error_${curr_date}.log"
  109. #Run monitor
  110. monitor
  111. rm -rf ${file_tmp}
  112. }
  113. monitor_cfg="$1"
  114. main