From 93ed0457e0eb10dd02e967fbe0898e2a741b2d38 Mon Sep 17 00:00:00 2001 From: Greg Sutcliffe Date: Fri, 9 Jan 2026 16:27:45 +0000 Subject: [PATCH] Nagios: remove first batch of services This removes the known-good things we've had in Zabbix for a while - RAID, disk space, processes, and mail queue. It also removes swap which we've decided we don't need. Also includes some FS overrides on the Zabbix side so the relevant NFS mounts get monitored on the OCI, and pkgs hosts, as per Nagios had. Signed-off-by: Greg Sutcliffe --- inventory/group_vars/oci_registry | 2 + inventory/group_vars/oci_registry_stg | 2 + inventory/group_vars/pkgs | 2 + inventory/group_vars/pkgs_stg | 2 + .../files/nagios/services/copr.cfg | 28 -------- .../files/nagios/services/procs.cfg | 39 ---------- .../files/nagios/services/raid.cfg | 16 ----- .../nagios/services/rdu3_internal/disk.cfg | 71 ------------------- .../files/nagios/services/swap.cfg | 14 ---- .../nagios/services/mail_queue.cfg.j2 | 8 --- 10 files changed, 8 insertions(+), 176 deletions(-) diff --git a/inventory/group_vars/oci_registry b/inventory/group_vars/oci_registry index 78f30a0008..c074627908 100644 --- a/inventory/group_vars/oci_registry +++ b/inventory/group_vars/oci_registry @@ -8,3 +8,5 @@ ipa_host_group_desc: OCI Registry service nfs_mount_opts: "rw,hard,bg,intr,noatime,nodev,nosuid,sec=sys,nfsvers=3" primary_auth_source: ipa tcp_ports: [5000] +zabbix_macros: + 'VFS.FS.FSTYPE.MATCHES': '^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs|nfs)$' diff --git a/inventory/group_vars/oci_registry_stg b/inventory/group_vars/oci_registry_stg index c572c30b13..96de82b739 100644 --- a/inventory/group_vars/oci_registry_stg +++ b/inventory/group_vars/oci_registry_stg @@ -7,3 +7,5 @@ ipa_host_group: oci-registry ipa_host_group_desc: OCI Registry service nfs_mount_opts: "rw,hard,bg,intr,noatime,nodev,nosuid,sec=sys,nfsvers=3" tcp_ports: [5000] +zabbix_macros: + 'VFS.FS.FSTYPE.MATCHES': '^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs|nfs)$' diff --git a/inventory/group_vars/pkgs b/inventory/group_vars/pkgs index b6d5a85c2d..f8fe5efdf9 100644 --- a/inventory/group_vars/pkgs +++ b/inventory/group_vars/pkgs @@ -46,3 +46,5 @@ tcp_ports: [80, 443] # There vars are used to configure mod_wsgi wsgi_procs: 20 wsgi_threads: 5 +zabbix_macros: + 'VFS.FS.FSTYPE.MATCHES': '^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs|nfs)$' diff --git a/inventory/group_vars/pkgs_stg b/inventory/group_vars/pkgs_stg index cb6d74b002..ca39f590a2 100644 --- a/inventory/group_vars/pkgs_stg +++ b/inventory/group_vars/pkgs_stg @@ -41,3 +41,5 @@ tcp_ports: [80, 443, 8444, 8443, 8445] # There vars are used to configure mod_wsgi wsgi_procs: 4 wsgi_threads: 4 +zabbix_macros: + 'VFS.FS.FSTYPE.MATCHES': '^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs|nfs)$' diff --git a/roles/nagios_server/files/nagios/services/copr.cfg b/roles/nagios_server/files/nagios/services/copr.cfg index 32b0e7f96c..89e79cdcb2 100644 --- a/roles/nagios_server/files/nagios/services/copr.cfg +++ b/roles/nagios_server/files/nagios/services/copr.cfg @@ -1,31 +1,3 @@ -define service { - hostgroup_name copr_back_aws, copr_back_dev_aws - service_description Disk space on Copr Backend used for build results - check_command check_by_nrpe!check_disk_/var/lib/copr/public_html - use disktemplate -} - -define service { - hostgroup_name copr_dist_git_aws, copr_dist_git_dev_aws - service_description Disk space on Copr DistGit used for imported packages - check_command check_by_nrpe!check_disk_/var/lib/dist-git - use disktemplate -} - -define service { - hostgroup_name copr_front_aws, copr_front_dev_aws - service_description Disk space on Copr Frontend used for the database - check_command check_by_nrpe!check_disk_/srv/copr-fe - use disktemplate -} - -define service { - hostgroup_name copr_keygen_aws, copr_keygen_dev_aws - service_description Disk space on Copr Keygen used for GPG keys - check_command check_by_nrpe!check_disk_/var/lib/copr-keygen - use disktemplate -} - define service { hostgroup_name copr_back_aws, copr_back_dev_aws service_description The copr-ping package builds diff --git a/roles/nagios_server/files/nagios/services/procs.cfg b/roles/nagios_server/files/nagios/services/procs.cfg index 0dd8bc5d17..c58c106347 100644 --- a/roles/nagios_server/files/nagios/services/procs.cfg +++ b/roles/nagios_server/files/nagios/services/procs.cfg @@ -1,25 +1,3 @@ -define service { - hostgroup virtservers - service_description Total Processes - check_command check_by_nrpe!check_total_procs - use defaulttemplate -} - -define service { - hostgroup virtservers - service_description Zombie Processes - check_command check_by_nrpe!check_zombie_procs - use defaulttemplate -} - - -define service { - hostgroup virtservers - service_description Cron Daemon - check_command check_by_nrpe!check_cron - use defaulttemplate -} - define service { hostgroup proxies service_description Varnish Process @@ -27,15 +5,6 @@ define service { use defaulttemplate } -# TODO: uncomment when retrace is moved -#define service { -# hostgroup retrace -# service_description Total Processes -# check_command check_by_nrpe!check_total_procs -# use retracetemplate -#} - - define service { hostgroup sign_bridge service_description Sigul bridge Process @@ -43,17 +12,9 @@ define service { use defaulttemplate } -define service { - hostgroup all, !mincheckgrp - service_description Rsyslogd Process - check_command check_by_nrpe!check_rsyslogd_proc - use defaulttemplate -} - define service { hostgroup pagure service_description Systemd Units check_command check_by_nrpe!check_systemd_units use defaulttemplate } - diff --git a/roles/nagios_server/files/nagios/services/raid.cfg b/roles/nagios_server/files/nagios/services/raid.cfg index b77e76cde8..e69de29bb2 100644 --- a/roles/nagios_server/files/nagios/services/raid.cfg +++ b/roles/nagios_server/files/nagios/services/raid.cfg @@ -1,16 +0,0 @@ -# Special-case this so it can use retracetemplate. -# TODO: uncomment when retrace is moved -#define service { -# hostgroup_name retrace -# service_description Check_Raid -# check_command check_by_nrpe!check_raid -# use retracetemplate -#} - -# Everything else uses this group. -define service { - hostgroup_name CheckRaid - service_description Check_Raid - check_command check_by_nrpe!check_raid - use lighttemplate -} diff --git a/roles/nagios_server/files/nagios/services/rdu3_internal/disk.cfg b/roles/nagios_server/files/nagios/services/rdu3_internal/disk.cfg index 9b3186a23c..b87b7a51fb 100644 --- a/roles/nagios_server/files/nagios/services/rdu3_internal/disk.cfg +++ b/roles/nagios_server/files/nagios/services/rdu3_internal/disk.cfg @@ -1,77 +1,6 @@ -define service { - hostgroup_name all, !mincheckgrp, !logging_rdu3 - service_description Disk_Space_/ - check_command check_by_nrpe!check_disk_/ - use disktemplate -} - -define service { - hostgroup_name all, !mincheckgrp - service_description Disk Space /boot - check_command check_by_nrpe!check_disk_/boot - use disktemplate -} - -# TODO: uncomment when qahardware is back online -#define service { -# hostgroup_name qahardware -# service_description Disk Space /srv -# check_command check_by_nrpe!check_disk_/srv -# use disktemplate -#} - -define service { - host_name log01.rdu3.fedoraproject.org - service_description Disk space /var/log - check_command check_by_nrpe!check_disk_/var/log - use disktemplate -} - define service { hostgroup_name pkgs service_description Check read-only filesystem check_command check_by_nrpe!check_readonly_fs use disktemplate } - -define service { - hostgroup_name pkgs - service_description Disk space /srv/cache/lookaside - check_command check_by_nrpe!check_disk_/srv/cache/lookaside - use disktemplate -} - -define service { - hostgroup_name koji - service_description Disk space / - check_command check_by_nrpe!check_disk_/ - use ppc-secondarytemplate -} - -define service { - hostgroup_name retrace - service_description Disk space / - check_command check_by_nrpe!check_disk_/ - use retracetemplate -} - -define service { - hostgroup_name retrace - service_description Disk Space for huge /srv - check_command check_by_nrpe!check_disk_huge_/srv - use disktemplate -} - -define service { - hostgroup_name people - service_description Disk space /project - check_command check_by_nrpe!check_disk_/project/ - use disktemplate -} - -define service { - hostgroup_name oci_registry - service_description Disk space /srv/registry - check_command check_by_nrpe!check_disk_/srv/registry - use disktemplate -} diff --git a/roles/nagios_server/files/nagios/services/swap.cfg b/roles/nagios_server/files/nagios/services/swap.cfg index 6af9f9f8c2..e69de29bb2 100644 --- a/roles/nagios_server/files/nagios/services/swap.cfg +++ b/roles/nagios_server/files/nagios/services/swap.cfg @@ -1,14 +0,0 @@ -define service { - hostgroup CheckSwap - service_description Swap-Is-Low - check_command check_by_nrpe!check_swap - use criticaltemplate -} - -# TODO: uncomment once retrace exists again -#define service { -# hostgroup retrace -# service_description Swap -# check_command check_by_nrpe!check_swap -# use retracetemplate -#} diff --git a/roles/nagios_server/templates/nagios/services/mail_queue.cfg.j2 b/roles/nagios_server/templates/nagios/services/mail_queue.cfg.j2 index 05c6128785..f1e57fd4da 100644 --- a/roles/nagios_server/templates/nagios/services/mail_queue.cfg.j2 +++ b/roles/nagios_server/templates/nagios/services/mail_queue.cfg.j2 @@ -1,12 +1,4 @@ {% if nagios_location == "rdu3_internal" %} -define service { - hostgroup nomail - service_description mail_queue - check_command check_by_nrpe!check_postfix_queue - max_check_attempts 7 - use defaulttemplate -} - define service { host_name bastion01.{{datacenter}}.fedoraproject.org service_description mail_queue_redhat