From 4cd515c14fa086800a92ab23c9844819d0b002f4 Mon Sep 17 00:00:00 2001 From: Ralph Bean Date: Wed, 30 Sep 2015 19:22:43 +0000 Subject: [PATCH] Some nagios monitoring for autocloud. --- roles/nagios_client/tasks/main.yml | 1 + .../templates/check_autocloud_proc.cfg.j2 | 1 + .../files/nagios/services/autocloud.cfg | 13 +++++++ .../files/nagios/services/fedmsg.cfg | 34 +++++++++++++++++++ .../files/nagios/services/websites.cfg | 8 +++++ roles/nagios_server/files/nrpe.cfg | 5 +++ 6 files changed, 62 insertions(+) create mode 100644 roles/nagios_client/templates/check_autocloud_proc.cfg.j2 create mode 100644 roles/nagios_server/files/nagios/services/autocloud.cfg diff --git a/roles/nagios_client/tasks/main.yml b/roles/nagios_client/tasks/main.yml index 915eb79608..18d875aa7d 100644 --- a/roles/nagios_client/tasks/main.yml +++ b/roles/nagios_client/tasks/main.yml @@ -92,6 +92,7 @@ - check_fedmsg_gateway_proc.cfg - check_fedmsg_masher_proc.cfg - check_redis_proc.cfg + - check_autocloud_proc.cfg - check_fcomm_cache_worker_proc.cfg - check_fcomm_queue.cfg - check_fedmsg_consumers.cfg diff --git a/roles/nagios_client/templates/check_autocloud_proc.cfg.j2 b/roles/nagios_client/templates/check_autocloud_proc.cfg.j2 new file mode 100644 index 0000000000..7701756106 --- /dev/null +++ b/roles/nagios_client/templates/check_autocloud_proc.cfg.j2 @@ -0,0 +1 @@ +command[check_redis_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'autocloud' -u root diff --git a/roles/nagios_server/files/nagios/services/autocloud.cfg b/roles/nagios_server/files/nagios/services/autocloud.cfg new file mode 100644 index 0000000000..80da5823bb --- /dev/null +++ b/roles/nagios_server/files/nagios/services/autocloud.cfg @@ -0,0 +1,13 @@ +define service { + host_name autocloud-backend01,autocloud-backend02 + service_description Check for autocloud proc + check_command check_by_nrpe!check_autocloud_proc + use defaulttemplate +} + +define service { + host_name autocloud-backend01,autocloud-backend02 + service_description Check for redis proc + check_command check_by_nrpe!check_redis_proc + use defaulttemplate +} diff --git a/roles/nagios_server/files/nagios/services/fedmsg.cfg b/roles/nagios_server/files/nagios/services/fedmsg.cfg index f223b6c427..0abb683dca 100644 --- a/roles/nagios_server/files/nagios/services/fedmsg.cfg +++ b/roles/nagios_server/files/nagios/services/fedmsg.cfg @@ -100,6 +100,12 @@ define service { check_command check_by_nrpe!check_fedmsg_masher_proc use defaulttemplate } +define service { + host_name autocloud-backend01,autocloud-backend02 + service_description Check for fedmsg-hub proc + check_command check_by_nrpe!check_fedmsg_hub_proc + use defaulttemplate +} # Odd one, check for the supybot fedmsg plugin @@ -286,6 +292,12 @@ define service { check_command check_by_nrpe!check_datanommer_fmn use defaulttemplate } +define service { + host_name busgateway01 + service_description Check datanommer for recent autocloud messages + check_command check_by_nrpe!check_datanommer_autocloud + use defaulttemplate +} # BEGIN, check consumers and producers @@ -394,6 +406,14 @@ define service { use defaulttemplate } +define service { + host_name autocloud-backend01,autocloud-backend02 + service_description Check fedmsg consumers and producers hub + check_command check_by_nrpe!check_fedmsg_cp_autocloud_backend_hub + use defaulttemplate +} + + # BEGIN exceptions counter define service { host_name busgateway01 @@ -500,6 +520,13 @@ define service { use defaulttemplate } +define service { + host_name autocloud-backend01,autocloud-backend02 + service_description Check fedmsg-hub consumers exceptions + check_command check_by_nrpe!check_fedmsg_cexceptions_autocloud_backend_hub + use defaulttemplate +} + # BEGIN backlog checking define service { @@ -607,6 +634,13 @@ define service { use defaulttemplate } +define service { + host_name autocloud-backend01,autocloud-backend02 + service_description Check fedmsg-hub consumers backlog + check_command check_by_nrpe!check_fedmsg_cbacklog_autocloud_backend_hub + use defaulttemplate +} + # A few producer checks go below here define service { diff --git a/roles/nagios_server/files/nagios/services/websites.cfg b/roles/nagios_server/files/nagios/services/websites.cfg index 3248f38590..f40e079b2f 100644 --- a/roles/nagios_server/files/nagios/services/websites.cfg +++ b/roles/nagios_server/files/nagios/services/websites.cfg @@ -302,3 +302,11 @@ define service { max_check_attempts 8 use internalwebsitetemplate } + +define service { + host_name autocloud-web01,autocloud-web02 + service_description autocloud-internal + check_command check_website!localhost!/autocloud/jobs/!Output + max_check_attempts 8 + use internalwebsitetemplate +} diff --git a/roles/nagios_server/files/nrpe.cfg b/roles/nagios_server/files/nrpe.cfg index 0b254c1d22..b5a6b468ef 100644 --- a/roles/nagios_server/files/nrpe.cfg +++ b/roles/nagios_server/files/nrpe.cfg @@ -237,6 +237,7 @@ command[check_haproxy_conns]=/usr/lib64/nagios/plugins/check_haproxy_conns.py command[check_fcomm_cache_worker_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'fcomm-cache-wor' -u apache command[check_fcomm_queue]=/usr/lib64/nagios/plugins/check_fcomm_queue command[check_redis_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'redis-server' -u redis +command[check_autocloud_proc]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -C 'autocloud' -u root command[check_openvpn_link]=/usr/lib64/nagios/plugins/check_ping -H 192.168.1.58 -w 375.0,20% -c 500,60% command[check_memcache]=/usr/lib64/nagios/plugins/check_procs -c 1:1 -a '/usr/bin/memcached' -u memcached command[check_memcache_connect]=/usr/lib64/nagios/plugins/check_memcache_connect @@ -275,6 +276,7 @@ command[check_datanommer_nuancier]=/usr/lib64/nagios/plugins/check_datanommer_ti command[check_datanommer_fedocal]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py fedocal 7884000 23652000 command[check_datanommer_ansible]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py ansible 432000 604800 command[check_datanommer_anitya]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py anitya 604800 1814400 +command[check_datanommer_autocloud]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py autocloud 604800 1814400 # These are not actually finished and deployed yet command[check_datanommer_mailman]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py mailman 14400 86400 @@ -295,6 +297,7 @@ command[check_fedmsg_cp_fedimg_backend]=/usr/lib64/nagios/plugins/check_fedmsg_p command[check_fedmsg_cp_hotness_backend]=/usr/lib64/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub BugzillaTicketFiler MonitoringProducer command[check_fedmsg_cp_bodhi_backend01_hub]=/usr/lib64/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub Masher MonitoringProducer command[check_fedmsg_cp_bodhi_backend02_hub]=/usr/lib64/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub UpdatesHandler MonitoringProducer +command[check_fedmsg_cp_autocloud_backend]=/usr/lib64/nagios/plugins/check_fedmsg_producers_consumers.py fedmsg-hub AutoCloudConsumer MonitoringPRoducer command[check_fedmsg_cexceptions_busgateway_hub]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Nommer 1 10 command[check_fedmsg_cexceptions_busgateway_relay]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-relay RelayConsumer 1 10 @@ -310,6 +313,7 @@ command[check_fedmsg_cexceptions_fedimg_backend]=/usr/lib64/nagios/plugins/check command[check_fedmsg_cexceptions_hotness_backend]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub BugzillaTicketFiler 1 10 command[check_fedmsg_cexceptions_bodhi_backend01_hub]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub Masher 1 10 command[check_fedmsg_cexceptions_bodhi_backend02_hub]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub UpdatesHandler 1 10 +command[check_fedmsg_cexceptions_autocloud_backend]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_exceptions.py fedmsg-hub AutoCloudConsumer 1 10 command[check_fedmsg_cbacklog_busgateway_hub]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Nommer 500 1000 command[check_fedmsg_cbacklog_busgateway_relay]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-relay RelayConsumer 10 50 @@ -325,6 +329,7 @@ command[check_fedmsg_cbacklog_fedimg_backend]=/usr/lib64/nagios/plugins/check_fe command[check_fedmsg_cbacklog_hotness_backend]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub BugzillaTicketFiler 100 500 command[check_fedmsg_cbacklog_bodhi_backend01_hub]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub Masher 500 1000 command[check_fedmsg_cbacklog_bodhi_backend02_hub]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub UpdatesHandler 500 1000 +command[check_fedmsg_cbacklog_autocloud_backend_hub]=/usr/lib64/nagios/plugins/check_fedmsg_consumer_backlog.py fedmsg-hub AutoCloudConsumer 500 1000 command[check_fedmsg_fmn_digest_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub DigestProducer 90 600 command[check_fedmsg_fmn_confirm_last_ran]={{libdir}}/nagios/plugins/check_fedmsg_producer_last_ran.py fedmsg-hub ConfirmationProducer 30 300