관리해야할 데몬들이 많아진다! Monit 써보기

2년전에 Monit 이란 존재를 들어보기만 하고 사용은 해보지 않았는데,
배포에 꼭 필요한 에이전트가 부팅 시 START에 실패하는 사례가 있어서 해결책을 찾아보다가 Monit을 사용해보기로 하였다.
마침 php-fpm의 child 개수가 일정 수치를 넘어가면 알람을 받고 싶은 니즈도 있어서 Monit으로 간단하게 두가지 고민을 해결하려한다.

codedeploy-agent 상태를 지켜보다가 프로세스가 다운되면 다시 시작해준다.

[root@ip-*-*-*-* monit.d]# cat codedeploy-agent
check process codedeploy-agent with pidfile /opt/codedeploy-agent/state/.pid/codedeploy-agent.pid
start program = "/usr/sbin/service codedeploy-agent start"
stop program = "/usr/sbin/service codedeploy-agent stop"
if 5 restarts within 5 cycles then timeout
[root@ip-*-*-*-* monit.d]# monit status codedeploy-agent
Monit 5.25.1 uptime: 3h 34m

Process 'codedeploy-agent'
  status                       OK
  monitoring status            Monitored
  monitoring mode              active
  on reboot                    start
  pid                          29848
  parent pid                   1
  uid                          0
  effective uid                0
  gid                          0
  uptime                       3h 31m
  threads                      2
  children                     1
  cpu                          0.0%
  cpu total                    0.0%
  memory                       0.1% [19.6 MB]
  memory total                 0.2% [61.1 MB]
  security attribute           (null)
  disk write                   0 B/s [4 kB total]
  data collected               Thu, 12 Jul 2018 20:37:45

php-fpm 차일드 개수가 62개보다 크면 슬랙으로 노티를 준다.

[root@ip-*-*-*-* monit.d]# cat php-fpm
check process php-fpm with pidfile /run/php-fpm/php-fpm.pid
group php
start program = "/usr/sbin/service php-fpm start"
stop program = "/usr/sbin/service php-fpm stop"
if children > 62 for 1 cycles then exec "/etc/monit/slack_notifications.sh"
if 10 restarts within 10 cycles then timeout
[root@ip-*-*-*-* monit.d]# monit status php-fpm
Monit 5.25.1 uptime: 3h 38m

Process 'php-fpm'
  status                       Resource limit matched
  monitoring status            Monitored
  monitoring mode              active
  on reboot                    start
  pid                          7019
  parent pid                   1
  uid                          0
  effective uid                0
  gid                          0
  uptime                       2h 41m
  threads                      1
  children                     64
  cpu                          0.0%
  cpu total                    0.0%
  memory                       0.1% [30.2 MB]
  memory total                 1.6% [465.2 MB]
  security attribute           (null)
  data collected               Thu, 12 Jul 2018 20:42:15
comments powered by Disqus