mirror of
https://pagure.io/fedora-infra/ansible.git
synced 2026-03-20 03:57:02 +08:00
Zabbix: fleet-wide LLD monitoring for discovered RAID devices
Signed-off-by: Greg Sutcliffe <fedora@emeraldreverie.org>
This commit is contained in:
8
roles/base/files/mdraid/agent-raid.conf
Normal file
8
roles/base/files/mdraid/agent-raid.conf
Normal file
@@ -0,0 +1,8 @@
|
||||
# taken from https://github.com/pfoo/zabbix-mdraid
|
||||
UserParameter=mdraid.discovery,ls /sys/class/block/ | awk 'BEGIN{printf "{\"data\":["}; /^md[0-9]+$/ {printf c"{\"{#MDNAME}\":\""$1"\"}";c=","}; END{print "]}"}' 2>/dev/null
|
||||
UserParameter=mdraid.disks[*],cat /sys/block/$1/md/raid_disks
|
||||
UserParameter=mdraid.sync_status[*],cat /sys/block/$1/md/sync_action
|
||||
UserParameter=mdraid.degraded[*],cat /sys/block/$1/md/degraded
|
||||
UserParameter=mdraid.sync_speed[*],echo $(($(cat /sys/block/$1/md/sync_speed | sed s/none/0/g)*1024))
|
||||
UserParameter=mdraid.level[*],cat /sys/block/$1/md/level
|
||||
UserParameter=mdraid.state[*],cat /sys/block/$1/md/array_state
|
||||
214
roles/base/files/mdraid/template-mdraid.yml
Normal file
214
roles/base/files/mdraid/template-mdraid.yml
Normal file
@@ -0,0 +1,214 @@
|
||||
zabbix_export:
|
||||
version: '7.0'
|
||||
template_groups:
|
||||
- uuid: a333cbd6a3ad44baaa4eee4b0c0b1bec
|
||||
name: Fedora
|
||||
templates:
|
||||
- uuid: 208aa70635844c64892975c392bc8428
|
||||
template: 'MD RAID by active agent'
|
||||
name: 'MD RAID by active agent'
|
||||
description: 'Template for monitoring of Linux MD RAID (mdadm). Taken from https://github.com/pfoo/zabbix-mdraid'
|
||||
vendor:
|
||||
name: 'GitHub:pfoo'
|
||||
version: 7.0.3
|
||||
groups:
|
||||
- name: Fedora
|
||||
discovery_rules:
|
||||
- uuid: 2e14934d330d40c491670298a8804bad
|
||||
name: 'MD Raid discovery'
|
||||
type: ZABBIX_ACTIVE
|
||||
key: mdraid.discovery
|
||||
delay: 1h
|
||||
lifetime: 30d
|
||||
enabled_lifetime_type: DISABLE_NEVER
|
||||
description: 'Discover every Linux MD Raid'
|
||||
item_prototypes:
|
||||
- uuid: 00f8b763c6844076ba4aa643b3d1230b
|
||||
name: 'MD Raid {#MDNAME} degraded disks'
|
||||
type: ZABBIX_ACTIVE
|
||||
key: 'mdraid.degraded[{#MDNAME}]'
|
||||
delay: 300s
|
||||
history: 90d
|
||||
trends: '0'
|
||||
description: 'Number of degraded disks in the array {#MDNAME}.'
|
||||
tags:
|
||||
- tag: component
|
||||
value: storage
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
trigger_prototypes:
|
||||
- uuid: 4fc495a2435d4f5bb337bb0958bfe6c5
|
||||
expression: 'last(/MD RAID by active agent/mdraid.degraded[{#MDNAME}])>0'
|
||||
name: 'MD Raid array {#MDNAME} is degraded on {HOST.NAME}'
|
||||
opdata: '{ITEM.VALUE} degraded disk'
|
||||
priority: HIGH
|
||||
description: 'One or more failing disk in array.'
|
||||
tags:
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
- tag: scope
|
||||
value: failure
|
||||
- uuid: feb853f2c65c4eb6a25ea9dc9d289174
|
||||
name: 'MD Raid {#MDNAME} array size'
|
||||
type: ZABBIX_ACTIVE
|
||||
key: 'mdraid.disks[{#MDNAME}]'
|
||||
delay: 1h
|
||||
history: 90d
|
||||
trends: '0'
|
||||
units: disks
|
||||
description: 'Number of disks configured in the array {#MDNAME}.'
|
||||
tags:
|
||||
- tag: component
|
||||
value: storage
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
trigger_prototypes:
|
||||
- uuid: 6b86ee6fa06045848146b287ee279fa0
|
||||
expression: '(last(/MD RAID by active agent/mdraid.disks[{#MDNAME}],#1)<>last(/MD RAID by active agent/mdraid.disks[{#MDNAME}],#2))<>0'
|
||||
name: 'Number of disks in MD Raid array {#MDNAME} changed on {HOST.NAME}'
|
||||
priority: WARNING
|
||||
description: 'A disk was either removed or added.'
|
||||
tags:
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
- tag: scope
|
||||
value: notice
|
||||
- uuid: 156f0a719aa049b591c12095df6e5eb9
|
||||
name: 'MD Raid {#MDNAME} array level'
|
||||
type: ZABBIX_ACTIVE
|
||||
key: 'mdraid.level[{#MDNAME}]'
|
||||
delay: 1h
|
||||
history: 90d
|
||||
value_type: CHAR
|
||||
trends: '0'
|
||||
description: 'Raid level for {#MDNAME} array.'
|
||||
tags:
|
||||
- tag: component
|
||||
value: storage
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
- uuid: ed7ddbcee07345e882565e339e83a846
|
||||
name: 'MD Raid {#MDNAME} state'
|
||||
type: ZABBIX_ACTIVE
|
||||
key: 'mdraid.state[{#MDNAME}]'
|
||||
delay: 300s
|
||||
history: 90d
|
||||
value_type: CHAR
|
||||
trends: '0'
|
||||
tags:
|
||||
- tag: component
|
||||
value: storage
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
- uuid: 18394549496948539720e33ae4fa1a71
|
||||
name: 'MD Raid {#MDNAME} sync speed'
|
||||
type: ZABBIX_ACTIVE
|
||||
key: 'mdraid.sync_speed[{#MDNAME}]'
|
||||
delay: 60s
|
||||
history: 90d
|
||||
trends: '0'
|
||||
units: B/s
|
||||
description: 'MD Raid {#MDNAME} sync speed in bytes/sec'
|
||||
tags:
|
||||
- tag: component
|
||||
value: storage
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
- uuid: a41a38f383f644dc8f0fe346df477ae0
|
||||
name: 'MD Raid {#MDNAME} sync status'
|
||||
type: ZABBIX_ACTIVE
|
||||
key: 'mdraid.sync_status[{#MDNAME}]'
|
||||
delay: 60s
|
||||
history: 90d
|
||||
value_type: CHAR
|
||||
trends: '0'
|
||||
description: |
|
||||
MD Raid {#MDNAME} sync status :
|
||||
resync: redundancy is being recalculated after unclean shutdown or creation
|
||||
recover: a hot spare is being built to replace a failed/missing device
|
||||
idle: nothing is happening
|
||||
check: A full check of redundancy was requested and is happening. This reads all blocks and checks them. A repair may also happen for some raid levels.
|
||||
repair: A full check and repair is happening. This is similar to resync, but was requested by the user, and the write-intent bitmap is NOT used to optimise the process.
|
||||
tags:
|
||||
- tag: component
|
||||
value: storage
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
trigger_prototypes:
|
||||
- uuid: 54a71f844abb433f9605f012c43c3592
|
||||
expression: 'find(/MD RAID by active agent/mdraid.sync_status[{#MDNAME}],,"like","recover")=1'
|
||||
name: 'MD Raid array {#MDNAME} is in recovery mode on {HOST.NAME}'
|
||||
priority: INFO
|
||||
description: 'This means a hot spare is being built to replace a failed/missing device.'
|
||||
tags:
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
- tag: scope
|
||||
value: performance
|
||||
- uuid: e6bfb530addb4c0db272a1031888138a
|
||||
expression: 'find(/MD RAID by active agent/mdraid.sync_status[{#MDNAME}],,"like","resync")=1'
|
||||
name: 'MD Raid array {#MDNAME} is syncing on {HOST.NAME}'
|
||||
priority: INFO
|
||||
description: 'This means redundancy is being recalculated after unclean shutdown.'
|
||||
tags:
|
||||
- tag: mdarray
|
||||
value: '{#MDNAME}'
|
||||
- tag: scope
|
||||
value: performance
|
||||
graph_prototypes:
|
||||
- uuid: f3934446edc944c4a3292dbe589cc2bd
|
||||
name: 'MD Raid {#MDNAME} degraded disks'
|
||||
graph_items:
|
||||
- color: 1A7C11
|
||||
item:
|
||||
host: 'MD RAID by active agent'
|
||||
key: 'mdraid.degraded[{#MDNAME}]'
|
||||
- uuid: 7b939f8d1120494980353a467965939c
|
||||
name: 'MD Raid {#MDNAME} sync speed'
|
||||
graph_items:
|
||||
- sortorder: '1'
|
||||
color: 1A7C11
|
||||
item:
|
||||
host: 'MD RAID by active agent'
|
||||
key: 'mdraid.sync_speed[{#MDNAME}]'
|
||||
tags:
|
||||
- tag: class
|
||||
value: os
|
||||
- tag: target
|
||||
value: linux
|
||||
dashboards:
|
||||
- uuid: 82cd634bac1f495d92dae860b332ad97
|
||||
name: 'MD Raid'
|
||||
pages:
|
||||
- widgets:
|
||||
- type: graphprototype
|
||||
width: '72'
|
||||
height: '5'
|
||||
fields:
|
||||
- type: INTEGER
|
||||
name: columns
|
||||
value: '1'
|
||||
- type: GRAPH_PROTOTYPE
|
||||
name: graphid
|
||||
value:
|
||||
host: 'MD RAID by active agent'
|
||||
name: 'MD Raid {#MDNAME} sync speed'
|
||||
- type: STRING
|
||||
name: reference
|
||||
value: AAAAL
|
||||
- type: graphprototype
|
||||
'y': '5'
|
||||
width: '72'
|
||||
height: '5'
|
||||
fields:
|
||||
- type: INTEGER
|
||||
name: columns
|
||||
value: '1'
|
||||
- type: GRAPH_PROTOTYPE
|
||||
name: graphid
|
||||
value:
|
||||
host: 'MD RAID by active agent'
|
||||
name: 'MD Raid {#MDNAME} degraded disks'
|
||||
- type: STRING
|
||||
name: reference
|
||||
value: AAAAM
|
||||
@@ -687,5 +687,14 @@
|
||||
- config
|
||||
- base
|
||||
|
||||
# This uses LLD discovery on md devices, and is a no-op if none are
|
||||
# found, so it should be safe to put everywhere, just in case.
|
||||
- name: Configure MD Raid monitoring in Zabbix
|
||||
ansible.builtin.include_tasks: mdraid-monitoring.yml
|
||||
tags:
|
||||
- mdraid
|
||||
- zabbix_agent
|
||||
- zabbix_api
|
||||
|
||||
- name: Setup Message of the Day (motd)
|
||||
import_tasks: motd.yml
|
||||
|
||||
42
roles/base/tasks/mdraid-monitoring.yml
Normal file
42
roles/base/tasks/mdraid-monitoring.yml
Normal file
@@ -0,0 +1,42 @@
|
||||
---
|
||||
# Monitoring config
|
||||
- name: Install Zabbix agent config drop-in
|
||||
ansible.builtin.copy:
|
||||
src: mdraid/agent-raid.conf
|
||||
dest: /etc/zabbix/zabbix_agentd.d/raid.conf
|
||||
mode: '0644'
|
||||
tags:
|
||||
- zabbix_agent
|
||||
- mdraid
|
||||
notify:
|
||||
- Restart zabbix agent
|
||||
|
||||
- name: Zabbix API Block
|
||||
vars:
|
||||
ansible_zabbix_auth_key: "{{ zabbix_auth_key }}"
|
||||
ansible_network_os: "{{ zabbix_network_os }}"
|
||||
ansible_connection: "{{ zabbix_connection }}"
|
||||
ansible_httpapi_port: "{{ zabbix_httpapi_port }}"
|
||||
ansible_httpapi_use_ssl: "{{ zabbix_httpapi_use_ssl }}"
|
||||
ansible_httpapi_validate_certs: "{{ zabbix_httpapi_validate_certs }}"
|
||||
ansible_host: "{{ zabbix_server }}"
|
||||
ansible_zabbix_url_path: "{{ zabbix_url_path }}"
|
||||
tags:
|
||||
- zabbix_api
|
||||
- mdraid
|
||||
block:
|
||||
- name: Import MD Raid template file
|
||||
community.zabbix.zabbix_template:
|
||||
template_yaml: "{{ lookup('file', 'mdraid/template-mdraid.yml') }}"
|
||||
state: present
|
||||
- name: Ensure MD Raid hostgroup is present
|
||||
community.zabbix.zabbix_group:
|
||||
host_groups:
|
||||
- MD Raid servers
|
||||
state: present
|
||||
- name: Add self to MD Raid in Zabbix
|
||||
community.zabbix.zabbix_host:
|
||||
host_name: "{{ inventory_hostname }}"
|
||||
host_groups: MD Raid servers
|
||||
link_templates: MD RAID by active agent
|
||||
force: false
|
||||
Reference in New Issue
Block a user