diff --git a/roles/base/files/mdraid/agent-raid.conf b/roles/base/files/mdraid/agent-raid.conf new file mode 100644 index 0000000000..cb5de9f27e --- /dev/null +++ b/roles/base/files/mdraid/agent-raid.conf @@ -0,0 +1,8 @@ +# taken from https://github.com/pfoo/zabbix-mdraid +UserParameter=mdraid.discovery,ls /sys/class/block/ | awk 'BEGIN{printf "{\"data\":["}; /^md[0-9]+$/ {printf c"{\"{#MDNAME}\":\""$1"\"}";c=","}; END{print "]}"}' 2>/dev/null +UserParameter=mdraid.disks[*],cat /sys/block/$1/md/raid_disks +UserParameter=mdraid.sync_status[*],cat /sys/block/$1/md/sync_action +UserParameter=mdraid.degraded[*],cat /sys/block/$1/md/degraded +UserParameter=mdraid.sync_speed[*],echo $(($(cat /sys/block/$1/md/sync_speed | sed s/none/0/g)*1024)) +UserParameter=mdraid.level[*],cat /sys/block/$1/md/level +UserParameter=mdraid.state[*],cat /sys/block/$1/md/array_state diff --git a/roles/base/files/mdraid/template-mdraid.yml b/roles/base/files/mdraid/template-mdraid.yml new file mode 100644 index 0000000000..79eb809b42 --- /dev/null +++ b/roles/base/files/mdraid/template-mdraid.yml @@ -0,0 +1,214 @@ +zabbix_export: + version: '7.0' + template_groups: + - uuid: a333cbd6a3ad44baaa4eee4b0c0b1bec + name: Fedora + templates: + - uuid: 208aa70635844c64892975c392bc8428 + template: 'MD RAID by active agent' + name: 'MD RAID by active agent' + description: 'Template for monitoring of Linux MD RAID (mdadm). Taken from https://github.com/pfoo/zabbix-mdraid' + vendor: + name: 'GitHub:pfoo' + version: 7.0.3 + groups: + - name: Fedora + discovery_rules: + - uuid: 2e14934d330d40c491670298a8804bad + name: 'MD Raid discovery' + type: ZABBIX_ACTIVE + key: mdraid.discovery + delay: 1h + lifetime: 30d + enabled_lifetime_type: DISABLE_NEVER + description: 'Discover every Linux MD Raid' + item_prototypes: + - uuid: 00f8b763c6844076ba4aa643b3d1230b + name: 'MD Raid {#MDNAME} degraded disks' + type: ZABBIX_ACTIVE + key: 'mdraid.degraded[{#MDNAME}]' + delay: 300s + history: 90d + trends: '0' + description: 'Number of degraded disks in the array {#MDNAME}.' + tags: + - tag: component + value: storage + - tag: mdarray + value: '{#MDNAME}' + trigger_prototypes: + - uuid: 4fc495a2435d4f5bb337bb0958bfe6c5 + expression: 'last(/MD RAID by active agent/mdraid.degraded[{#MDNAME}])>0' + name: 'MD Raid array {#MDNAME} is degraded on {HOST.NAME}' + opdata: '{ITEM.VALUE} degraded disk' + priority: HIGH + description: 'One or more failing disk in array.' + tags: + - tag: mdarray + value: '{#MDNAME}' + - tag: scope + value: failure + - uuid: feb853f2c65c4eb6a25ea9dc9d289174 + name: 'MD Raid {#MDNAME} array size' + type: ZABBIX_ACTIVE + key: 'mdraid.disks[{#MDNAME}]' + delay: 1h + history: 90d + trends: '0' + units: disks + description: 'Number of disks configured in the array {#MDNAME}.' + tags: + - tag: component + value: storage + - tag: mdarray + value: '{#MDNAME}' + trigger_prototypes: + - uuid: 6b86ee6fa06045848146b287ee279fa0 + expression: '(last(/MD RAID by active agent/mdraid.disks[{#MDNAME}],#1)<>last(/MD RAID by active agent/mdraid.disks[{#MDNAME}],#2))<>0' + name: 'Number of disks in MD Raid array {#MDNAME} changed on {HOST.NAME}' + priority: WARNING + description: 'A disk was either removed or added.' + tags: + - tag: mdarray + value: '{#MDNAME}' + - tag: scope + value: notice + - uuid: 156f0a719aa049b591c12095df6e5eb9 + name: 'MD Raid {#MDNAME} array level' + type: ZABBIX_ACTIVE + key: 'mdraid.level[{#MDNAME}]' + delay: 1h + history: 90d + value_type: CHAR + trends: '0' + description: 'Raid level for {#MDNAME} array.' + tags: + - tag: component + value: storage + - tag: mdarray + value: '{#MDNAME}' + - uuid: ed7ddbcee07345e882565e339e83a846 + name: 'MD Raid {#MDNAME} state' + type: ZABBIX_ACTIVE + key: 'mdraid.state[{#MDNAME}]' + delay: 300s + history: 90d + value_type: CHAR + trends: '0' + tags: + - tag: component + value: storage + - tag: mdarray + value: '{#MDNAME}' + - uuid: 18394549496948539720e33ae4fa1a71 + name: 'MD Raid {#MDNAME} sync speed' + type: ZABBIX_ACTIVE + key: 'mdraid.sync_speed[{#MDNAME}]' + delay: 60s + history: 90d + trends: '0' + units: B/s + description: 'MD Raid {#MDNAME} sync speed in bytes/sec' + tags: + - tag: component + value: storage + - tag: mdarray + value: '{#MDNAME}' + - uuid: a41a38f383f644dc8f0fe346df477ae0 + name: 'MD Raid {#MDNAME} sync status' + type: ZABBIX_ACTIVE + key: 'mdraid.sync_status[{#MDNAME}]' + delay: 60s + history: 90d + value_type: CHAR + trends: '0' + description: | + MD Raid {#MDNAME} sync status : + resync: redundancy is being recalculated after unclean shutdown or creation + recover: a hot spare is being built to replace a failed/missing device + idle: nothing is happening + check: A full check of redundancy was requested and is happening. This reads all blocks and checks them. A repair may also happen for some raid levels. + repair: A full check and repair is happening. This is similar to resync, but was requested by the user, and the write-intent bitmap is NOT used to optimise the process. + tags: + - tag: component + value: storage + - tag: mdarray + value: '{#MDNAME}' + trigger_prototypes: + - uuid: 54a71f844abb433f9605f012c43c3592 + expression: 'find(/MD RAID by active agent/mdraid.sync_status[{#MDNAME}],,"like","recover")=1' + name: 'MD Raid array {#MDNAME} is in recovery mode on {HOST.NAME}' + priority: INFO + description: 'This means a hot spare is being built to replace a failed/missing device.' + tags: + - tag: mdarray + value: '{#MDNAME}' + - tag: scope + value: performance + - uuid: e6bfb530addb4c0db272a1031888138a + expression: 'find(/MD RAID by active agent/mdraid.sync_status[{#MDNAME}],,"like","resync")=1' + name: 'MD Raid array {#MDNAME} is syncing on {HOST.NAME}' + priority: INFO + description: 'This means redundancy is being recalculated after unclean shutdown.' + tags: + - tag: mdarray + value: '{#MDNAME}' + - tag: scope + value: performance + graph_prototypes: + - uuid: f3934446edc944c4a3292dbe589cc2bd + name: 'MD Raid {#MDNAME} degraded disks' + graph_items: + - color: 1A7C11 + item: + host: 'MD RAID by active agent' + key: 'mdraid.degraded[{#MDNAME}]' + - uuid: 7b939f8d1120494980353a467965939c + name: 'MD Raid {#MDNAME} sync speed' + graph_items: + - sortorder: '1' + color: 1A7C11 + item: + host: 'MD RAID by active agent' + key: 'mdraid.sync_speed[{#MDNAME}]' + tags: + - tag: class + value: os + - tag: target + value: linux + dashboards: + - uuid: 82cd634bac1f495d92dae860b332ad97 + name: 'MD Raid' + pages: + - widgets: + - type: graphprototype + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: GRAPH_PROTOTYPE + name: graphid + value: + host: 'MD RAID by active agent' + name: 'MD Raid {#MDNAME} sync speed' + - type: STRING + name: reference + value: AAAAL + - type: graphprototype + 'y': '5' + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: GRAPH_PROTOTYPE + name: graphid + value: + host: 'MD RAID by active agent' + name: 'MD Raid {#MDNAME} degraded disks' + - type: STRING + name: reference + value: AAAAM diff --git a/roles/base/tasks/main.yml b/roles/base/tasks/main.yml index 74c5e6d4d6..6c31d4e151 100644 --- a/roles/base/tasks/main.yml +++ b/roles/base/tasks/main.yml @@ -687,5 +687,14 @@ - config - base +# This uses LLD discovery on md devices, and is a no-op if none are +# found, so it should be safe to put everywhere, just in case. +- name: Configure MD Raid monitoring in Zabbix + ansible.builtin.include_tasks: mdraid-monitoring.yml + tags: + - mdraid + - zabbix_agent + - zabbix_api + - name: Setup Message of the Day (motd) import_tasks: motd.yml diff --git a/roles/base/tasks/mdraid-monitoring.yml b/roles/base/tasks/mdraid-monitoring.yml new file mode 100644 index 0000000000..39bcb9aa16 --- /dev/null +++ b/roles/base/tasks/mdraid-monitoring.yml @@ -0,0 +1,42 @@ +--- +# Monitoring config +- name: Install Zabbix agent config drop-in + ansible.builtin.copy: + src: mdraid/agent-raid.conf + dest: /etc/zabbix/zabbix_agentd.d/raid.conf + mode: '0644' + tags: + - zabbix_agent + - mdraid + notify: + - Restart zabbix agent + +- name: Zabbix API Block + vars: + ansible_zabbix_auth_key: "{{ zabbix_auth_key }}" + ansible_network_os: "{{ zabbix_network_os }}" + ansible_connection: "{{ zabbix_connection }}" + ansible_httpapi_port: "{{ zabbix_httpapi_port }}" + ansible_httpapi_use_ssl: "{{ zabbix_httpapi_use_ssl }}" + ansible_httpapi_validate_certs: "{{ zabbix_httpapi_validate_certs }}" + ansible_host: "{{ zabbix_server }}" + ansible_zabbix_url_path: "{{ zabbix_url_path }}" + tags: + - zabbix_api + - mdraid + block: + - name: Import MD Raid template file + community.zabbix.zabbix_template: + template_yaml: "{{ lookup('file', 'mdraid/template-mdraid.yml') }}" + state: present + - name: Ensure MD Raid hostgroup is present + community.zabbix.zabbix_group: + host_groups: + - MD Raid servers + state: present + - name: Add self to MD Raid in Zabbix + community.zabbix.zabbix_host: + host_name: "{{ inventory_hostname }}" + host_groups: MD Raid servers + link_templates: MD RAID by active agent + force: false