From 7f60fdf69048eaef8439a8505df120e0eeaad43e Mon Sep 17 00:00:00 2001 From: Greg Sutcliffe Date: Mon, 31 Mar 2025 15:04:56 +0100 Subject: [PATCH] Zabbix-stg: More base server config This adds: - Matrix media type - User for a Matrix bot - Trigger using Matrix & the bot - PSK configuration, using the PSK file already deployed - 2 base templates - a general one suitable even for Koji - a dependant one for all other hosts - Autoregistration config to use the new base template This is all scoped to staging via a new include in main.yml --- inventory/group_vars/zabbix_stg | 1 + .../zabbix_server/files/matrix_mediatype.js | 188 ++ .../files/templates/linux_autoregister.yaml | 1711 +++++++++++++++++ .../files/templates/linux_hosts.yaml | 354 ++++ .../zabbix_server/tasks/configure_api.yml | 163 ++ roles/zabbix/zabbix_server/tasks/main.yml | 6 + 6 files changed, 2423 insertions(+) create mode 100644 roles/zabbix/zabbix_server/files/matrix_mediatype.js create mode 100644 roles/zabbix/zabbix_server/files/templates/linux_autoregister.yaml create mode 100644 roles/zabbix/zabbix_server/files/templates/linux_hosts.yaml create mode 100644 roles/zabbix/zabbix_server/tasks/configure_api.yml diff --git a/inventory/group_vars/zabbix_stg b/inventory/group_vars/zabbix_stg index 7dd16eeef0..45e8e6b26f 100644 --- a/inventory/group_vars/zabbix_stg +++ b/inventory/group_vars/zabbix_stg @@ -21,3 +21,4 @@ num_cpus: 2 # the host_vars/$hostname file tcp_ports: [80, 443] notes: Test instance for zabbix server +zabbix_stg_matrix_roomid: "!dODrizXNtqWjiylAyh:fedora.im" diff --git a/roles/zabbix/zabbix_server/files/matrix_mediatype.js b/roles/zabbix/zabbix_server/files/matrix_mediatype.js new file mode 100644 index 0000000000..0c28855e8e --- /dev/null +++ b/roles/zabbix/zabbix_server/files/matrix_mediatype.js @@ -0,0 +1,188 @@ +const required_input = [ + "matrix_url", + "matrix_room", + "matrix_token", + + "alert_subject", + "alert_message", + + "event_severity", + "event_is_problem", + "event_is_update", + + "enable_colors", + "enable_icons", +] + +const update_color = "#000000" +const recovery_color = "#098e68" +const severity_colors = [ + "#5a5a5a", // Not classified + "#2caed6", // Information + "#d6832c", // Warning + "#d6542c", // Average + "#d62c2c", // High + "#ff0000", // Disaster +] + +const update_icon = String.fromCodePoint("0x1f4dd") +const recovery_icon = String.fromCodePoint("0x2705") +const severity_icons = [ + String.fromCodePoint("0x2754"), // Not classified + String.fromCodePoint("0x2139"), // Information + String.fromCodePoint("0x26a0"), // Warning + String.fromCodePoint("0x274c"), // Average + String.fromCodePoint("0x1f525"), // High + String.fromCodePoint("0x1f4a5"), // Disaster +] + +var Matrix = { + validate: function (params) { + required_input.forEach(function (key) { + if (key in params && params[key] != undefined) { + Matrix[key] = params[key] + } else { + throw "Missing value for key: " + key + } + }) + + Matrix.alert_subject = Matrix.alert_subject.replace(/\r/g, "") + Matrix.alert_message = Matrix.alert_message.replace(/\r/g, "") + + Matrix.event_severity = parseInt(Matrix.event_severity) + Matrix.event_is_problem = parseInt(Matrix.event_is_problem) + Matrix.event_is_update = parseInt(Matrix.event_is_update) + + if (typeof params.event_url === "string" && params.event_url.trim() !== "") { + Matrix.event_url = params.event_url + } + + Matrix.enable_colors = Matrix.enable_colors.toLowerCase() == "true" + Matrix.enable_icons = Matrix.enable_icons.toLowerCase() == "true" + + if (typeof params.http_proxy === "string" && params.http_proxy.trim() !== "") { + Matrix.http_proxy = params.http_proxy + } + + if (Matrix.event_is_problem == 1) { + if (Matrix.event_is_update == 0) { + Matrix.kind = "problem" + Matrix.color = severity_colors[Matrix.event_severity] + Matrix.icon = severity_icons[Matrix.event_severity] + } else { + Matrix.kind = "update" + Matrix.color = update_color + Matrix.icon = update_icon + } + } else { + Matrix.kind = "recovery" + Matrix.color = recovery_color + Matrix.icon = recovery_icon + } + }, + + request: function (path, payload) { + var request = new HttpRequest() + request.addHeader("Content-Type: application/json") + request.addHeader("Authorization: Bearer " + Matrix.matrix_token) + + var url = Matrix.matrix_url + path + + Zabbix.Log(4, "[Matrix Webhook] new request to: " + url) + + if (Matrix.http_proxy != undefined) { + request.setProxy(Matrix.http_proxy) + } + + var blob = request.post(url, JSON.stringify(payload)) + + if (request.getStatus() !== 200) { + var resp = JSON.parse(blob) + + if (request.getStatus() == 403 && resp.error.indexOf("not in room") !== -1) { + throw "User is not in room" + } + + Zabbix.Log(4, "[Matrix Webhook] Request failed: " + resp.error) + throw "Request failed: " + request.getStatus() + " " + resp.error + } + }, + + joinRoom: function () { + Matrix.request("/_matrix/client/r0/rooms/" + Matrix.matrix_room + "/join", {}) + }, + + sendMessage: function () { + var body = "" + if (Matrix.enable_icons && Matrix.icon) { + body += Matrix.icon + " " + } + body += Matrix.alert_subject + "\n" + body += Matrix.alert_message + + if (Matrix.event_url != undefined) { + body += "\n" + Matrix.event_url + } + + var formatted_body = "" + if (Matrix.enable_colors) { + formatted_body += ''.replace("{color}", Matrix.color) + } else { + formatted_body += "" + } + + formatted_body += "" + if (Matrix.enable_icons && Matrix.icon) { + formatted_body += Matrix.icon + " " + } + + if (Matrix.event_url != undefined) { + formatted_body += ''.replace("{href}", Matrix.event_url) + } + + formatted_body += Matrix.alert_subject + + if (Matrix.event_url != undefined) { + formatted_body += "" + } + + formatted_body += "
" + + formatted_body += Matrix.alert_message.replace(/\n/g, "
") + formatted_body += "
" + + const payload = { + body: body, + msgtype: "m.notice", + format: "org.matrix.custom.html", + formatted_body: formatted_body, + } + + Matrix.request( + "/_matrix/client/r0/rooms/" + Matrix.matrix_room + "/send/m.room.message", + payload + ) + }, +} + +try { + var params = JSON.parse(value) + + Matrix.validate(params) + + try { + Matrix.sendMessage() + } catch (error) { + if (error == "User is not in room") { + Matrix.joinRoom() + Matrix.sendMessage() + } else { + throw error + } + } + + return "OK" +} catch (error) { + Zabbix.Log(4, "[Matrix Webhook] Error: " + error) + throw "Sending failed: " + error +} diff --git a/roles/zabbix/zabbix_server/files/templates/linux_autoregister.yaml b/roles/zabbix/zabbix_server/files/templates/linux_autoregister.yaml new file mode 100644 index 0000000000..0b324fc6cd --- /dev/null +++ b/roles/zabbix/zabbix_server/files/templates/linux_autoregister.yaml @@ -0,0 +1,1711 @@ +zabbix_export: + version: '7.0' + template_groups: + - uuid: a333cbd6a3ad44baaa4eee4b0c0b1bec + name: Fedora + templates: + - uuid: dc6cbc7e9bc74c7790bfabd70f307edc + template: 'Linux Autoregistration' + name: 'Linux Autoregistration' + description: | + Modified version of the Official Linux template from Zabbix. Disables all triggers but keeps all data - this is so we can inherit from here to both normal hosts & Koji builders. + + As such this template has CPU load, memory usage, and disk IO triggers disabled, and these are moved to "Linux Hosts". Koji builders are in "Koji Hosts" + groups: + - name: Fedora + items: + - uuid: ab5422c5f7ff479aae1409747a14ec3a + name: 'Linux: Host name of Zabbix agent running' + type: ZABBIX_ACTIVE + key: agent.hostname + delay: 1h + history: 7d + value_type: CHAR + trends: '0' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: system + - uuid: 0c448426a0c34cbfbd55b471dfeea4ee + name: 'Linux: Zabbix agent ping' + key: agent.ping + history: 7d + description: 'The agent always returns 1 for this item. It could be used in combination with nodata() for availability check.' + valuemap: + name: 'Zabbix agent ping status' + tags: + - tag: component + value: system + triggers: + - uuid: 59bc644bab6741eeb837647a07d9d9c6 + expression: 'nodata(/Linux Autoregistration/agent.ping,{$AGENT.NODATA_TIMEOUT})=1' + name: 'Linux: Zabbix agent is not available' + event_name: 'Linux: Zabbix agent is not available (or nodata for {$AGENT.NODATA_TIMEOUT})' + priority: AVERAGE + description: 'For active agents, nodata() with agent.ping is used with {$AGENT.NODATA_TIMEOUT} as time threshold.' + manual_close: 'YES' + tags: + - tag: scope + value: availability + - uuid: 3806e2bf8ce64003887752f6073240c5 + name: 'Linux: Version of Zabbix agent running' + type: ZABBIX_ACTIVE + key: agent.version + delay: 1h + history: 7d + value_type: CHAR + trends: '0' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: application + - uuid: 0ee427fcb9b34b5f97dd240b4a6fb398 + name: 'Linux: Maximum number of open file descriptors' + type: ZABBIX_ACTIVE + key: kernel.maxfiles + delay: 1h + history: 7d + description: 'It could be increased by using `sysctl` utility or modifying the file `/etc/sysctl.conf`.' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: system + triggers: + - uuid: ea90a8a1ee1845a3bca4293618808627 + expression: 'last(/Linux Autoregistration/kernel.maxfiles)<{$KERNEL.MAXFILES.MIN}' + name: 'Linux: Configured max number of open filedescriptors is too low' + event_name: 'Linux: Configured max number of open filedescriptors is too low (< {$KERNEL.MAXFILES.MIN})' + priority: INFO + tags: + - tag: scope + value: performance + - uuid: 65e08232a1d64446879e37661e934145 + name: 'Linux: Maximum number of processes' + type: ZABBIX_ACTIVE + key: kernel.maxproc + delay: 1h + history: 7d + description: 'It could be increased by using `sysctl` utility or modifying the file `/etc/sysctl.conf`.' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: system + triggers: + - uuid: fc02937816ff44bd8be04dc472543c87 + expression: 'last(/Linux Autoregistration/kernel.maxproc)<{$KERNEL.MAXPROC.MIN}' + name: 'Linux: Configured max number of processes is too low' + event_name: 'Linux: Configured max number of processes is too low (< {$KERNEL.MAXPROC.MIN})' + priority: INFO + dependencies: + - name: 'Linux: Getting closer to process limit' + expression: 'last(/Linux Autoregistration/proc.num)/last(/Linux Autoregistration/kernel.maxproc)*100>80' + tags: + - tag: scope + value: performance + - uuid: b8e686a0cfac46b09eb165369677c4dd + name: 'Linux: Number of processes' + type: ZABBIX_ACTIVE + key: proc.num + history: 7d + tags: + - tag: component + value: system + - uuid: af6922e6db9a41a98088fa167550ed85 + name: 'Linux: Number of running processes' + type: ZABBIX_ACTIVE + key: 'proc.num[,,run]' + history: 7d + tags: + - tag: component + value: system + - uuid: a898c9da644d4dd38576e0799842a390 + name: 'Linux: System boot time' + type: ZABBIX_ACTIVE + key: system.boottime + delay: 15m + history: 7d + units: unixtime + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1h + tags: + - tag: component + value: system + - uuid: 32acbff8a3bc4875933c0157ba56689c + name: 'Linux: Interrupts per second' + type: ZABBIX_ACTIVE + key: system.cpu.intr + history: 7d + value_type: FLOAT + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + tags: + - tag: component + value: cpu + - uuid: 604ff86e10f746508b894445580c841a + name: 'Linux: Load average (1m avg)' + type: ZABBIX_ACTIVE + key: 'system.cpu.load[all,avg1]' + history: 7d + value_type: FLOAT + tags: + - tag: component + value: cpu + - uuid: 5bc5f9e07d9b4986b9bb47cd15717abe + name: 'Linux: Load average (5m avg)' + type: ZABBIX_ACTIVE + key: 'system.cpu.load[all,avg5]' + history: 7d + value_type: FLOAT + tags: + - tag: component + value: cpu + - uuid: dd8d46411ba943939b57f8f803a0eb64 + name: 'Linux: Load average (15m avg)' + type: ZABBIX_ACTIVE + key: 'system.cpu.load[all,avg15]' + history: 7d + value_type: FLOAT + tags: + - tag: component + value: cpu + - uuid: 06a359ff37cc4d9bbef3ce3abeba69f1 + name: 'Linux: Number of CPUs' + type: ZABBIX_ACTIVE + key: system.cpu.num + history: 7d + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: cpu + - uuid: cab757c097a94db688d936253b668e25 + name: 'Linux: Context switches per second' + type: ZABBIX_ACTIVE + key: system.cpu.switches + history: 7d + value_type: FLOAT + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + tags: + - tag: component + value: cpu + - uuid: 935269141e144b239bb86c1bcbca953a + name: 'Linux: CPU utilization' + type: DEPENDENT + key: system.cpu.util + delay: '0' + history: 7d + value_type: FLOAT + units: '%' + description: 'The CPU utilization expressed in %.' + preprocessing: + - type: JAVASCRIPT + parameters: + - | + //Calculate utilization + return (100 - value) + master_item: + key: 'system.cpu.util[,idle]' + tags: + - tag: component + value: cpu + - uuid: 1cbe996037a24256a5f5a4b6e33768b4 + name: 'Linux: CPU guest time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,guest]' + history: 7d + value_type: FLOAT + units: '%' + description: 'Guest time - the time spent on running a virtual CPU for a guest operating system.' + tags: + - tag: component + value: cpu + - uuid: 29fa67e6d28c4fada2be2bfff4fe0ce5 + name: 'Linux: CPU guest nice time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,guest_nice]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The time spent on running a niced guest (a virtual CPU for guest operating systems under the control of the Linux kernel).' + tags: + - tag: component + value: cpu + - uuid: f1c9646067214e5f845cc0d08f5f93b1 + name: 'Linux: CPU idle time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,idle]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The time the CPU has spent doing nothing.' + tags: + - tag: component + value: cpu + - uuid: 4da856f0e2f146f5b660161af66124ca + name: 'Linux: CPU interrupt time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,interrupt]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The amount of time the CPU has been servicing hardware interrupts.' + tags: + - tag: component + value: cpu + - uuid: 090ebc255efe46149a327626cb67cd26 + name: 'Linux: CPU iowait time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,iowait]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The amount of time the CPU has been waiting for I/O to complete.' + tags: + - tag: component + value: cpu + - uuid: e34fe87c63f64678b0f7f379d7761a04 + name: 'Linux: CPU nice time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,nice]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The time the CPU has spent running users'' processes that have been niced.' + tags: + - tag: component + value: cpu + - uuid: 1bc60aea3c8d4dd2911afaf2e715fec5 + name: 'Linux: CPU softirq time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,softirq]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The amount of time the CPU has been servicing software interrupts.' + tags: + - tag: component + value: cpu + - uuid: f5b4f45595a04f248308cc1ad80527df + name: 'Linux: CPU steal time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,steal]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The amount of "stolen" CPU from this virtual machine by the hypervisor for other tasks, such as running another virtual machine.' + tags: + - tag: component + value: cpu + - uuid: e473b02aea4c41139466c4d4f31da638 + name: 'Linux: CPU system time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,system]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The time the CPU has spent running the kernel and its processes.' + tags: + - tag: component + value: cpu + - uuid: 64f2b21c5eda42e7bfde6e69b36944b9 + name: 'Linux: CPU user time' + type: ZABBIX_ACTIVE + key: 'system.cpu.util[,user]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The time the CPU has spent running users'' processes that are not niced.' + tags: + - tag: component + value: cpu + - uuid: a5ba228633bb464b9c69de9c897e2fcd + name: 'Linux: System name' + type: ZABBIX_ACTIVE + key: system.hostname + delay: 1h + history: 2w + value_type: CHAR + trends: '0' + description: 'The host name of the system.' + inventory_link: NAME + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 12h + tags: + - tag: component + value: system + triggers: + - uuid: 23947e8645214a8db55913cbb68823aa + expression: 'change(/Linux Autoregistration/system.hostname) and length(last(/Linux Autoregistration/system.hostname))>0' + name: 'Linux: System name has changed' + event_name: 'Linux: System name has changed (new name: {ITEM.VALUE})' + priority: INFO + description: 'The name of the system has changed. Acknowledge to close the problem manually.' + manual_close: 'YES' + tags: + - tag: scope + value: notice + - tag: scope + value: security + - uuid: 2312d07aad7a494d8a4d2b999772a54d + name: 'Linux: System local time' + type: ZABBIX_ACTIVE + key: system.localtime + history: 7d + units: unixtime + description: 'The local system time of the host.' + tags: + - tag: component + value: system + triggers: + - uuid: 984dbf55e29b4a3ea8b99d32686b4fdc + expression: 'fuzzytime(/Linux Autoregistration/system.localtime,{$SYSTEM.FUZZYTIME.MAX})=0' + name: 'Linux: System time is out of sync' + event_name: 'Linux: System time is out of sync (diff with Zabbix server > {$SYSTEM.FUZZYTIME.MAX}s)' + priority: WARNING + description: 'The host''s system time is different from Zabbix server time.' + manual_close: 'YES' + tags: + - tag: scope + value: availability + - tag: scope + value: performance + - uuid: 2aa6ab3d2b594e44a01f3abcd49a6b9c + name: 'Linux: Operating system architecture' + type: ZABBIX_ACTIVE + key: system.sw.arch + delay: 1h + history: 2w + value_type: CHAR + trends: '0' + description: 'The architecture of the operating system.' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: os + - uuid: c700dd57ff984fc6b1db8d3f065558a7 + name: 'Linux: Operating system' + type: ZABBIX_ACTIVE + key: system.sw.os + delay: 1h + history: 2w + value_type: CHAR + trends: '0' + inventory_link: OS + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: os + triggers: + - uuid: 2ea848f2321346d1a35555df36dbf44f + expression: 'change(/Linux Autoregistration/system.sw.os) and length(last(/Linux Autoregistration/system.sw.os))>0' + name: 'Linux: Operating system description has changed' + priority: INFO + description: 'The description of the operating system has changed. Possible reasons are that the system has been updated or replaced. Acknowledge to close the problem manually.' + manual_close: 'YES' + dependencies: + - name: 'Linux: System name has changed' + expression: 'change(/Linux Autoregistration/system.hostname) and length(last(/Linux Autoregistration/system.hostname))>0' + tags: + - tag: scope + value: notice + - uuid: 63d77e13e3b5471ca71c3bbdd3437bc8 + name: 'Linux: Software installed' + type: ZABBIX_ACTIVE + key: system.sw.packages + delay: 1h + history: 2w + value_type: TEXT + trends: '0' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: os + - uuid: 120b11ae917842bbbcb4639879679937 + name: 'Linux: Free swap space' + type: ZABBIX_ACTIVE + key: 'system.swap.size[,free]' + history: 7d + units: B + description: 'The free space of the swap volume/file expressed in bytes.' + tags: + - tag: component + value: memory + - tag: component + value: storage + - uuid: d57e348e595c4220b2d30d5efddda394 + name: 'Linux: Free swap space in %' + type: ZABBIX_ACTIVE + key: 'system.swap.size[,pfree]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The free space of the swap volume/file expressed in %.' + tags: + - tag: component + value: memory + - tag: component + value: storage + - uuid: a66dca03cd40429085d762c184768a3b + name: 'Linux: Total swap space' + type: ZABBIX_ACTIVE + key: 'system.swap.size[,total]' + history: 7d + units: B + description: 'The total space of the swap volume/file expressed in bytes.' + tags: + - tag: component + value: memory + - tag: component + value: storage + - uuid: bffb666a5a1a4b069a7436e10a4f2c32 + name: 'Linux: System description' + type: ZABBIX_ACTIVE + key: system.uname + delay: 15m + history: 2w + value_type: CHAR + trends: '0' + description: 'The information as normally returned by `uname -a`.' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 12h + tags: + - tag: component + value: system + - uuid: 97ea109c46ba4c39a3c59b5cd62f25e1 + name: 'Linux: System uptime' + type: ZABBIX_ACTIVE + key: system.uptime + delay: 30s + history: 2w + trends: '0' + units: uptime + description: 'The system uptime expressed in the following format: "N days, hh:mm:ss".' + tags: + - tag: component + value: system + triggers: + - uuid: 2936ced8f75348fbb23def3000c5b9cc + expression: 'last(/Linux Autoregistration/system.uptime)<10m' + name: 'Linux: {HOST.NAME} has been restarted' + event_name: 'Linux: {HOST.NAME} has been restarted (uptime < 10m)' + priority: WARNING + description: 'The host uptime is less than 10 minutes.' + manual_close: 'YES' + tags: + - tag: scope + value: notice + - uuid: e5b84a42b3b347d7bd6d057d08285944 + name: 'Linux: Number of logged in users' + type: ZABBIX_ACTIVE + key: system.users.num + history: 7d + description: 'The number of users who are currently logged in.' + tags: + - tag: component + value: environment + - uuid: 1c3a3e0429b24c6e85999d767d55ff5e + name: 'Linux: Checksum of /etc/passwd' + type: ZABBIX_ACTIVE + key: 'vfs.file.cksum[/etc/passwd,sha256]' + delay: 15m + history: 7d + value_type: CHAR + trends: '0' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1h + tags: + - tag: component + value: security + triggers: + - uuid: 57df5f0d3a3d4982a534bef76c28e0d2 + expression: 'last(/Linux Autoregistration/vfs.file.cksum[/etc/passwd,sha256],#1)<>last(/Linux Autoregistration/vfs.file.cksum[/etc/passwd,sha256],#2)' + name: 'Linux: /etc/passwd has been changed' + priority: INFO + manual_close: 'YES' + dependencies: + - name: 'Linux: Operating system description has changed' + expression: 'change(/Linux Autoregistration/system.sw.os) and length(last(/Linux Autoregistration/system.sw.os))>0' + - name: 'Linux: System name has changed' + expression: 'change(/Linux Autoregistration/system.hostname) and length(last(/Linux Autoregistration/system.hostname))>0' + tags: + - tag: scope + value: security + - uuid: cc26eaa48c2f46419c710bdba794aca7 + name: 'Linux: Available memory' + type: ZABBIX_ACTIVE + key: 'vm.memory.size[available]' + history: 7d + units: B + description: | + The available memory: + - in Linux - available = free + buffers + cache; + - on other platforms calculation may vary. + + See also Appendixes in Zabbix Documentation about parameters of the `vm.memory.size` item. + tags: + - tag: component + value: memory + - uuid: e2bd96f9c89d4b2eba7930c056dee196 + name: 'Linux: Available memory in %' + type: ZABBIX_ACTIVE + key: 'vm.memory.size[pavailable]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The available memory as percentage of the total. See also Appendixes in Zabbix Documentation about parameters of the `vm.memory.size` item.' + tags: + - tag: component + value: memory + - uuid: 92d4b8f6b19e46b7a3a65dd7a3e3bb72 + name: 'Linux: Total memory' + type: ZABBIX_ACTIVE + key: 'vm.memory.size[total]' + history: 7d + units: B + description: 'The total memory expressed in bytes.' + tags: + - tag: component + value: memory + - uuid: 8428171130844cadba5e8caa52bb2cfb + name: 'Linux: Memory utilization' + type: DEPENDENT + key: vm.memory.utilization + delay: '0' + history: 7d + value_type: FLOAT + units: '%' + description: 'The percentage of used memory is calculated as `100-pavailable`.' + preprocessing: + - type: JAVASCRIPT + parameters: + - 'return (100-value);' + master_item: + key: 'vm.memory.size[pavailable]' + tags: + - tag: component + value: memory + discovery_rules: + - uuid: ef7dc53425a54faf99580901139ac7fd + name: 'Network interface discovery' + type: ZABBIX_ACTIVE + key: net.if.discovery + delay: 1h + filter: + evaltype: AND + conditions: + - macro: '{#IFNAME}' + value: '{$NET.IF.IFNAME.MATCHES}' + formulaid: A + - macro: '{#IFNAME}' + value: '{$NET.IF.IFNAME.NOT_MATCHES}' + operator: NOT_MATCHES_REGEX + formulaid: B + lifetime: 30d + enabled_lifetime_type: DISABLE_NEVER + description: 'The discovery of network interfaces.' + item_prototypes: + - uuid: 7f2d829d18db45a791b9fe15d85a3351 + name: 'Interface {#IFNAME}: Inbound packets discarded' + type: ZABBIX_ACTIVE + key: 'net.if.in["{#IFNAME}",dropped]' + delay: 3m + history: 7d + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + - uuid: 363e1751f7ee4e0ca9948b0f0093c080 + name: 'Interface {#IFNAME}: Inbound packets with errors' + type: ZABBIX_ACTIVE + key: 'net.if.in["{#IFNAME}",errors]' + delay: 3m + history: 7d + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + - uuid: 9e46a99d948f433782d31f4dace07cf3 + name: 'Interface {#IFNAME}: Bits received' + type: ZABBIX_ACTIVE + key: 'net.if.in["{#IFNAME}"]' + delay: 3m + history: 7d + units: bps + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + - type: MULTIPLIER + parameters: + - '8' + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + - uuid: a080109a37274355afed146cc3839b4f + name: 'Interface {#IFNAME}: Outbound packets discarded' + type: ZABBIX_ACTIVE + key: 'net.if.out["{#IFNAME}",dropped]' + delay: 3m + history: 7d + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + - uuid: 7e82e450f0894571bdb3bd32d986bcc2 + name: 'Interface {#IFNAME}: Outbound packets with errors' + type: ZABBIX_ACTIVE + key: 'net.if.out["{#IFNAME}",errors]' + delay: 3m + history: 7d + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + - uuid: 9bcc9a2c264a4bec80c0e3e5e5e59291 + name: 'Interface {#IFNAME}: Bits sent' + type: ZABBIX_ACTIVE + key: 'net.if.out["{#IFNAME}"]' + delay: 3m + history: 7d + units: bps + preprocessing: + - type: CHANGE_PER_SECOND + parameters: + - '' + - type: MULTIPLIER + parameters: + - '8' + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + - uuid: a9a1e32e82d44256aaa2ab21b968d66a + name: 'Interface {#IFNAME}: Operational status' + type: ZABBIX_ACTIVE + key: 'vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"]' + history: 7d + trends: '0' + description: 'Reference: https://www.kernel.org/doc/Documentation/networking/operstates.txt' + valuemap: + name: ifOperStatus + preprocessing: + - type: JAVASCRIPT + parameters: + - | + var newvalue; + switch(value) { + case "unknown": + newvalue = 0; + break; + case "notpresent": + newvalue = 1; + break; + case "down": + newvalue = 2; + break; + case "lowerlayerdown": + newvalue = 3; + break; + case "testing": + newvalue = 4; + break; + case "dormant": + newvalue = 5; + break; + case "up": + newvalue = 6; + break; default: + newvalue = "Problem parsing interface operstate in JS"; + } + return newvalue; + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + trigger_prototypes: + - uuid: 0fa8c8ab83d840bab1b71067088277a7 + expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])=2 and (last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#1)<>last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#2))' + recovery_mode: RECOVERY_EXPRESSION + recovery_expression: 'last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0' + name: 'Interface {#IFNAME}: Link down' + opdata: 'Current state: {ITEM.LASTVALUE1}' + priority: AVERAGE + description: | + This trigger expression works as follows: + 1. It can be triggered if the operations status is down. + 2. `{$IFCONTROL:"{#IFNAME}"}=1` - a user can redefine context macro to value - 0. That marks this interface as not important. No new trigger will be fired if this interface is down. + 3. `{TEMPLATE_NAME:METRIC.diff()}=1` - the trigger fires only if the operational status was up to (1) sometime before (so, do not fire for the 'eternal off' interfaces.) + + WARNING: if closed manually - it will not fire again on the next poll, because of .diff. + manual_close: 'YES' + tags: + - tag: scope + value: availability + - uuid: 7577169d4afa4474aa8653142db36dd6 + name: 'Interface {#IFNAME}: Speed' + type: ZABBIX_ACTIVE + key: 'vfs.file.contents["/sys/class/net/{#IFNAME}/speed"]' + delay: 5m + history: 7d + trends: '0' + units: bps + description: | + It indicates the latest or current speed value of the interface. The value is an integer representing the link speed expressed in bits/sec. + This attribute is only valid for the interfaces that implement the ethtool `get_link_ksettings` method (mostly Ethernet). + + Reference: https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net + preprocessing: + - type: MULTIPLIER + parameters: + - '1000000' + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1h + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + - uuid: 6805e2bc864d4b8e8a7c607ce82436a4 + name: 'Interface {#IFNAME}: Interface type' + type: ZABBIX_ACTIVE + key: 'vfs.file.contents["/sys/class/net/{#IFNAME}/type"]' + delay: 1h + history: 7d + trends: '0' + description: | + It indicates the interface protocol type as a decimal value. + See `include/uapi/linux/if_arp.h` for all possible values. + Reference: https://www.kernel.org/doc/Documentation/ABI/testing/sysfs-class-net + valuemap: + name: 'Linux::Interface protocol types' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1d + tags: + - tag: component + value: network + - tag: interface + value: '{#IFNAME}' + trigger_prototypes: + - uuid: e1406e1962d3456a9498a21fadc44ffa + expression: | + change(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"])<0 and last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"])>0 + and + (last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/type"])=6 or last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/type"])=1) + and + (last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])<>2) + recovery_mode: RECOVERY_EXPRESSION + recovery_expression: | + (change(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"])>0 and last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"],#2)>0) or + (last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])=2) + name: 'Interface {#IFNAME}: Ethernet has changed to lower speed than it was before' + opdata: 'Current reported speed: {ITEM.LASTVALUE1}' + priority: INFO + description: 'This Ethernet connection has transitioned down from its known maximum speed. This might be a sign of autonegotiation issues. Acknowledge to close the problem manually.' + manual_close: 'YES' + dependencies: + - name: 'Interface {#IFNAME}: Link down' + expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])=2 and (last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#1)<>last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#2))' + recovery_expression: 'last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0' + tags: + - tag: scope + value: performance + - uuid: 2006632b358c41208616eef32788b124 + expression: | + (avg(/Linux Autoregistration/net.if.in["{#IFNAME}"],15m)>({$IF.UTIL.MAX:"{#IFNAME}"}/100)*last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"]) or + avg(/Linux Autoregistration/net.if.out["{#IFNAME}"],15m)>({$IF.UTIL.MAX:"{#IFNAME}"}/100)*last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"])) and + last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"])>0 + recovery_mode: RECOVERY_EXPRESSION + recovery_expression: | + avg(/Linux Autoregistration/net.if.in["{#IFNAME}"],15m)<(({$IF.UTIL.MAX:"{#IFNAME}"}-3)/100)*last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"]) and + avg(/Linux Autoregistration/net.if.out["{#IFNAME}"],15m)<(({$IF.UTIL.MAX:"{#IFNAME}"}-3)/100)*last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/speed"]) + name: 'Interface {#IFNAME}: High bandwidth usage' + event_name: 'Interface {#IFNAME}: High bandwidth usage (>{$IF.UTIL.MAX:"{#IFNAME}"}%)' + opdata: 'In: {ITEM.LASTVALUE1}, out: {ITEM.LASTVALUE3}, speed: {ITEM.LASTVALUE2}' + status: DISABLED + discover: NO_DISCOVER + priority: INFO + description: 'The utilization of the network interface is close to its estimated maximum bandwidth.' + manual_close: 'YES' + dependencies: + - name: 'Interface {#IFNAME}: Link down' + expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])=2 and (last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#1)<>last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#2))' + recovery_expression: 'last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0' + tags: + - tag: scope + value: performance + - uuid: dfae7ae9ce734081a61115f64613c7f7 + expression: | + min(/Linux Autoregistration/net.if.in["{#IFNAME}",errors],5m)>{$IF.ERRORS.WARN:"{#IFNAME}"} + or min(/Linux Autoregistration/net.if.out["{#IFNAME}",errors],5m)>{$IF.ERRORS.WARN:"{#IFNAME}"} + recovery_mode: RECOVERY_EXPRESSION + recovery_expression: | + max(/Linux Autoregistration/net.if.in["{#IFNAME}",errors],5m)<{$IF.ERRORS.WARN:"{#IFNAME}"}*0.8 + and max(/Linux Autoregistration/net.if.out["{#IFNAME}",errors],5m)<{$IF.ERRORS.WARN:"{#IFNAME}"}*0.8 + name: 'Interface {#IFNAME}: High error rate' + event_name: 'Interface {#IFNAME}: High error rate (>{$IF.ERRORS.WARN:"{#IFNAME}"} for 5m)' + opdata: 'errors in: {ITEM.LASTVALUE1}, errors out: {ITEM.LASTVALUE2}' + priority: WARNING + description: 'It recovers when it is below 80% of the `{$IF.ERRORS.WARN:"{#IFNAME}"}` threshold.' + manual_close: 'YES' + dependencies: + - name: 'Interface {#IFNAME}: Link down' + expression: '{$IFCONTROL:"{#IFNAME}"}=1 and last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])=2 and (last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#1)<>last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"],#2))' + recovery_expression: 'last(/Linux Autoregistration/vfs.file.contents["/sys/class/net/{#IFNAME}/operstate"])<>2 or {$IFCONTROL:"{#IFNAME}"}=0' + tags: + - tag: scope + value: availability + - tag: scope + value: performance + graph_prototypes: + - uuid: 1ad5cc48ad6345b5bd0770b6f05b87f8 + name: 'Interface {#IFNAME}: Network traffic' + graph_items: + - drawtype: GRADIENT_LINE + color: 199C0D + item: + host: 'Linux Autoregistration' + key: 'net.if.in["{#IFNAME}"]' + - sortorder: '1' + drawtype: BOLD_LINE + color: F63100 + item: + host: 'Linux Autoregistration' + key: 'net.if.out["{#IFNAME}"]' + - sortorder: '2' + color: 00611C + yaxisside: RIGHT + item: + host: 'Linux Autoregistration' + key: 'net.if.out["{#IFNAME}",errors]' + - sortorder: '3' + color: F7941D + yaxisside: RIGHT + item: + host: 'Linux Autoregistration' + key: 'net.if.in["{#IFNAME}",errors]' + - sortorder: '4' + color: FC6EA3 + yaxisside: RIGHT + item: + host: 'Linux Autoregistration' + key: 'net.if.out["{#IFNAME}",dropped]' + - sortorder: '5' + color: 6C59DC + yaxisside: RIGHT + item: + host: 'Linux Autoregistration' + key: 'net.if.in["{#IFNAME}",dropped]' + - uuid: 7f6e71ed8978492db44484f014de2017 + name: 'Mounted filesystem discovery' + type: ZABBIX_ACTIVE + key: vfs.fs.discovery + delay: 1h + filter: + evaltype: AND + conditions: + - macro: '{#FSNAME}' + value: '{$VFS.FS.FSNAME.MATCHES}' + formulaid: A + - macro: '{#FSNAME}' + value: '{$VFS.FS.FSNAME.NOT_MATCHES}' + operator: NOT_MATCHES_REGEX + formulaid: B + - macro: '{#FSTYPE}' + value: '{$VFS.FS.FSTYPE.MATCHES}' + formulaid: C + - macro: '{#FSTYPE}' + value: '{$VFS.FS.FSTYPE.NOT_MATCHES}' + operator: NOT_MATCHES_REGEX + formulaid: D + lifetime: 30d + enabled_lifetime_type: DISABLE_NEVER + description: 'The discovery of mounted filesystems with different types.' + item_prototypes: + - uuid: a9428bdc83b04953bb2bbbb319a0deb2 + name: '{#FSNAME}: Free inodes in %' + type: ZABBIX_ACTIVE + key: 'vfs.fs.inode[{#FSNAME},pfree]' + history: 7d + value_type: FLOAT + units: '%' + tags: + - tag: component + value: storage + - tag: filesystem + value: '{#FSNAME}' + trigger_prototypes: + - uuid: 6c18c24b1ce443628c7cf12e51089b59 + expression: 'min(/Linux Autoregistration/vfs.fs.inode[{#FSNAME},pfree],5m)<{$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}' + name: '{#FSNAME}: Running out of free inodes' + event_name: '{#FSNAME}: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}%)' + opdata: 'Free inodes: {ITEM.LASTVALUE1}' + priority: AVERAGE + description: | + It may become impossible to write to a disk if there are no index nodes left. + The following error messages may be returned as symptoms, even though the free space is available: + - 'No space left on device'; + - 'Disk is full'. + tags: + - tag: scope + value: capacity + - tag: scope + value: performance + - uuid: fb191ce3a8c74077b64c5ac8facddaf3 + expression: 'min(/Linux Autoregistration/vfs.fs.inode[{#FSNAME},pfree],5m)<{$VFS.FS.INODE.PFREE.MIN.WARN:"{#FSNAME}"}' + name: '{#FSNAME}: Running out of free inodes' + event_name: '{#FSNAME}: Running out of free inodes (free < {$VFS.FS.INODE.PFREE.MIN.WARN:"{#FSNAME}"}%)' + opdata: 'Free inodes: {ITEM.LASTVALUE1}' + priority: WARNING + description: | + It may become impossible to write to a disk if there are no index nodes left. + The following error messages may be returned as symptoms, even though the free space is available: + - 'No space left on device'; + - 'Disk is full'. + dependencies: + - name: '{#FSNAME}: Running out of free inodes' + expression: 'min(/Linux Autoregistration/vfs.fs.inode[{#FSNAME},pfree],5m)<{$VFS.FS.INODE.PFREE.MIN.CRIT:"{#FSNAME}"}' + tags: + - tag: scope + value: capacity + - tag: scope + value: performance + - uuid: 4737ba012a634505ad9bd355f93d4a2e + name: '{#FSNAME}: Space utilization' + type: ZABBIX_ACTIVE + key: 'vfs.fs.size[{#FSNAME},pused]' + history: 7d + value_type: FLOAT + units: '%' + description: 'The space utilization expressed in % for {#FSNAME}.' + tags: + - tag: component + value: storage + - tag: filesystem + value: '{#FSNAME}' + - uuid: 42167a72d98944e7964a54c8947d12f4 + name: '{#FSNAME}: Total space' + type: ZABBIX_ACTIVE + key: 'vfs.fs.size[{#FSNAME},total]' + history: 7d + units: B + description: 'Total space in bytes' + tags: + - tag: component + value: storage + - tag: filesystem + value: '{#FSNAME}' + - uuid: 10fc6306994c41ee8fccf73a11017ff0 + name: '{#FSNAME}: Used space' + type: ZABBIX_ACTIVE + key: 'vfs.fs.size[{#FSNAME},used]' + history: 7d + units: B + description: 'Used storage in bytes' + tags: + - tag: component + value: storage + - tag: filesystem + value: '{#FSNAME}' + trigger_prototypes: + - uuid: 3da6ee62e5f747cb9861856176d87fe0 + expression: | + last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},pused])>{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"} and + ((last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},total])-last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},used]))<{$VFS.FS.FREE.MIN.CRIT:"{#FSNAME}"} or timeleft(/Linux Autoregistration/vfs.fs.size[{#FSNAME},pused],1h,100)<1d) + name: '{#FSNAME}: Disk space is critically low' + event_name: '{#FSNAME}: Disk space is critically low (used > {$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"}%)' + opdata: 'Space used: {ITEM.LASTVALUE3} of {ITEM.LASTVALUE2} ({ITEM.LASTVALUE1})' + priority: AVERAGE + description: | + Two conditions should match: + 1. The first condition - utilization of the space should be above `{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"}`. + 2. The second condition should be one of the following: + - the disk free space is less than `{$VFS.FS.FREE.MIN.CRIT:"{#FSNAME}"}`; + - the disk will be full in less than 24 hours. + manual_close: 'YES' + tags: + - tag: scope + value: availability + - tag: scope + value: capacity + - uuid: aa439d4cd32f494798d38262dc3630de + expression: | + last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},pused])>{$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"} and + ((last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},total])-last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},used]))<{$VFS.FS.FREE.MIN.WARN:"{#FSNAME}"} or timeleft(/Linux Autoregistration/vfs.fs.size[{#FSNAME},pused],1h,100)<1d) + name: '{#FSNAME}: Disk space is low' + event_name: '{#FSNAME}: Disk space is low (used > {$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"}%)' + opdata: 'Space used: {ITEM.LASTVALUE3} of {ITEM.LASTVALUE2} ({ITEM.LASTVALUE1})' + priority: WARNING + description: | + Two conditions should match: + 1. The first condition - utilization of the space should be above `{$VFS.FS.PUSED.MAX.WARN:"{#FSNAME}"}`. + 2. The second condition should be one of the following: + - the disk free space is less than `{$VFS.FS.FREE.MIN.WARN:"{#FSNAME}"}`; + - the disk will be full in less than 24 hours. + manual_close: 'YES' + dependencies: + - name: '{#FSNAME}: Disk space is critically low' + expression: | + last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},pused])>{$VFS.FS.PUSED.MAX.CRIT:"{#FSNAME}"} and + ((last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},total])-last(/Linux Autoregistration/vfs.fs.size[{#FSNAME},used]))<{$VFS.FS.FREE.MIN.CRIT:"{#FSNAME}"} or timeleft(/Linux Autoregistration/vfs.fs.size[{#FSNAME},pused],1h,100)<1d) + tags: + - tag: scope + value: availability + - tag: scope + value: capacity + graph_prototypes: + - uuid: 67c7a04121f3459daea4679a668aa171 + name: '{#FSNAME}: Disk space usage' + width: '600' + height: '340' + type: PIE + show_3d: 'YES' + graph_items: + - color: '969696' + calc_fnc: LAST + type: GRAPH_SUM + item: + host: 'Linux Autoregistration' + key: 'vfs.fs.size[{#FSNAME},total]' + - sortorder: '1' + color: C80000 + calc_fnc: LAST + item: + host: 'Linux Autoregistration' + key: 'vfs.fs.size[{#FSNAME},used]' + overrides: + - name: 'Skip metadata collection for dynamic FS' + step: '1' + filter: + conditions: + - macro: '{#FSTYPE}' + value: ^(btrfs|zfs)$ + formulaid: A + operations: + - operationobject: ITEM_PROTOTYPE + operator: LIKE + value: inode + discover: NO_DISCOVER + tags: + - tag: class + value: os + - tag: target + value: linux + macros: + - macro: '{$AGENT.NODATA_TIMEOUT}' + value: 30m + description: 'No data timeout for active agents. Consider to keep it relatively high.' + - macro: '{$CPU.UTIL.CRIT}' + value: '90' + - macro: '{$IF.ERRORS.WARN}' + value: '2' + - macro: '{$IF.UTIL.MAX}' + value: '90' + description: 'This macro is used as a threshold in the interface utilization trigger.' + - macro: '{$IFCONTROL}' + value: '1' + - macro: '{$KERNEL.MAXFILES.MIN}' + value: '256' + - macro: '{$KERNEL.MAXPROC.MIN}' + value: '1024' + - macro: '{$LOAD_AVG_PER_CPU.MAX.WARN}' + value: '1.5' + description: 'The CPU load per core is considered sustainable. If necessary, it can be tuned.' + - macro: '{$MEMORY.AVAILABLE.MIN}' + value: 20M + description: 'This macro is used as a threshold in the memory available trigger.' + - macro: '{$MEMORY.UTIL.MAX}' + value: '90' + description: 'This macro is used as a threshold in the memory utilization trigger.' + - macro: '{$NET.IF.IFNAME.MATCHES}' + value: '^.*$' + - macro: '{$NET.IF.IFNAME.NOT_MATCHES}' + value: '(^Software Loopback Interface|^NULL[0-9.]*$|^[Ll]o[0-9.]*$|^[Ss]ystem$|^Nu[0-9.]*$|^veth[0-9A-z]+$|docker[0-9]+|br-[a-z0-9]{12})' + description: 'It filters out `loopbacks`, `nulls`, `docker veth` links and `docker0 bridge` by default.' + - macro: '{$SWAP.PFREE.MIN.WARN}' + value: '50' + - macro: '{$SYSTEM.FUZZYTIME.MAX}' + value: '60' + - macro: '{$VFS.DEV.DEVNAME.MATCHES}' + value: .+ + description: 'This macro is used for a discovery of block devices. It can be overridden on host level or its linked template level.' + - macro: '{$VFS.DEV.DEVNAME.NOT_MATCHES}' + value: '^(loop[0-9]*|sd[a-z][0-9]+|nbd[0-9]+|sr[0-9]+|fd[0-9]+|dm-[0-9]+|ram[0-9]+|ploop[a-z0-9]+|md[0-9]*|hcp[0-9]*|zram[0-9]*)' + description: 'This macro is used for a discovery of block devices. It can be overridden on host level or its linked template level.' + - macro: '{$VFS.DEV.READ.AWAIT.WARN}' + value: '20' + description: 'The average response time (in ms) of disk read before the trigger would fire.' + - macro: '{$VFS.DEV.WRITE.AWAIT.WARN}' + value: '20' + description: 'The average response time (in ms) of disk write before the trigger would fire.' + - macro: '{$VFS.FS.FREE.MIN.CRIT}' + value: 5G + description: 'The critical threshold for utilization of the filesystem.' + - macro: '{$VFS.FS.FREE.MIN.WARN}' + value: 10G + description: 'The warning threshold for utilization of the filesystem.' + - macro: '{$VFS.FS.FSNAME.MATCHES}' + value: .+ + description: 'This macro is used for discovery of the filesystems. It can be overridden on host level or its linked template level.' + - macro: '{$VFS.FS.FSNAME.NOT_MATCHES}' + value: ^(/dev|/sys|/run|/proc|.+/shm$) + description: 'This macro is used for discovery of the filesystems. It can be overridden on host level or its linked template level.' + - macro: '{$VFS.FS.FSTYPE.MATCHES}' + value: ^(btrfs|ext2|ext3|ext4|reiser|xfs|ffs|ufs|jfs|jfs2|vxfs|hfs|apfs|refs|ntfs|fat32|zfs)$ + description: 'This macro is used for discovery of the filesystems. It can be overridden on host level or its linked template level.' + - macro: '{$VFS.FS.FSTYPE.NOT_MATCHES}' + value: ^\s$ + description: 'This macro is used for discovery of the filesystems. It can be overridden on host level or its linked template level.' + - macro: '{$VFS.FS.INODE.PFREE.MIN.CRIT}' + value: '10' + description: 'The critical threshold of the filesystem metadata utilization.' + - macro: '{$VFS.FS.INODE.PFREE.MIN.WARN}' + value: '20' + description: 'The warning threshold of the filesystem metadata utilization.' + - macro: '{$VFS.FS.PUSED.MAX.CRIT}' + value: '90' + description: 'The critical threshold of the filesystem utilization.' + - macro: '{$VFS.FS.PUSED.MAX.WARN}' + value: '80' + description: 'The warning threshold of the filesystem utilization.' + dashboards: + - uuid: 3d5c95268cef41468869be8d5e76a58e + name: 'Network interfaces' + pages: + - name: Overview + widgets: + - type: graphprototype + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: GRAPH_PROTOTYPE + name: graphid + value: + host: 'Linux Autoregistration' + name: 'Interface {#IFNAME}: Network traffic' + - type: STRING + name: reference + value: AABBQ + - type: INTEGER + name: rows + value: '1' + - uuid: 14c0171e92da44478edc9d7df8b24f62 + name: 'System performance' + pages: + - widgets: + - type: graph + width: '36' + height: '5' + fields: + - type: GRAPH + name: graphid + value: + host: 'Linux Autoregistration' + name: 'Linux: System load' + - type: STRING + name: reference + value: AABBR + - type: graph + 'y': '5' + width: '36' + height: '5' + fields: + - type: GRAPH + name: graphid + value: + host: 'Linux Autoregistration' + name: 'Linux: Memory usage' + - type: STRING + name: reference + value: AABBT + - type: graphprototype + 'y': '10' + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: GRAPH_PROTOTYPE + name: graphid + value: + host: 'Linux Autoregistration' + name: '{#FSNAME}: Disk space usage' + - type: STRING + name: reference + value: AABBV + - type: INTEGER + name: rows + value: '1' + - type: graphprototype + 'y': '15' + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: STRING + name: reference + value: AABBW + - type: INTEGER + name: rows + value: '1' + - type: graphprototype + 'y': '20' + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: STRING + name: reference + value: AABBX + - type: INTEGER + name: rows + value: '1' + - type: graphprototype + 'y': '25' + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: STRING + name: reference + value: AABBY + - type: INTEGER + name: rows + value: '1' + - type: graphprototype + 'y': '30' + width: '72' + height: '5' + fields: + - type: INTEGER + name: columns + value: '1' + - type: GRAPH_PROTOTYPE + name: graphid + value: + host: 'Linux Autoregistration' + name: 'Interface {#IFNAME}: Network traffic' + - type: STRING + name: reference + value: AABBZ + - type: INTEGER + name: rows + value: '1' + - type: graph + x: '36' + width: '36' + height: '5' + fields: + - type: GRAPH + name: graphid + value: + host: 'Linux Autoregistration' + name: 'Linux: CPU usage' + - type: STRING + name: reference + value: AABBS + - type: graph + x: '36' + 'y': '5' + width: '36' + height: '5' + fields: + - type: GRAPH + name: graphid + value: + host: 'Linux Autoregistration' + name: 'Linux: Swap usage' + - type: STRING + name: reference + value: AABBU + valuemaps: + - uuid: f948b9b240c14884ba53c498f150faad + name: ifOperStatus + mappings: + - value: '0' + newvalue: unknown + - value: '1' + newvalue: notpresent + - value: '2' + newvalue: down + - value: '3' + newvalue: lowerlayerdown + - value: '4' + newvalue: testing + - value: '5' + newvalue: dormant + - value: '6' + newvalue: up + - uuid: 0e2fe9a0a451493283abade0aef11164 + name: 'Linux::Interface protocol types' + mappings: + - value: '0' + newvalue: 'from KA9Q: NET/ROM pseudo' + - value: '1' + newvalue: Ethernet + - value: '2' + newvalue: 'Experimental Ethernet' + - value: '3' + newvalue: 'AX.25 Level 2' + - value: '4' + newvalue: 'PROnet token ring' + - value: '5' + newvalue: Chaosnet + - value: '6' + newvalue: 'IEEE 802.2 Ethernet/TR/TB' + - value: '7' + newvalue: ARCnet + - value: '8' + newvalue: APPLEtalk + - value: '15' + newvalue: 'Frame Relay DLCI' + - value: '19' + newvalue: ATM + - value: '23' + newvalue: 'Metricom STRIP (new IANA id)' + - value: '24' + newvalue: 'IEEE 1394 IPv4 - RFC 2734' + - value: '27' + newvalue: EUI-64 + - value: '32' + newvalue: InfiniBand + - value: '256' + newvalue: ARPHRD_SLIP + - value: '257' + newvalue: ARPHRD_CSLIP + - value: '258' + newvalue: ARPHRD_SLIP6 + - value: '259' + newvalue: ARPHRD_CSLIP6 + - value: '260' + newvalue: 'Notional KISS type' + - value: '264' + newvalue: ARPHRD_ADAPT + - value: '270' + newvalue: ARPHRD_ROSE + - value: '271' + newvalue: 'CCITT X.25' + - value: '272' + newvalue: 'Boards with X.25 in firmware' + - value: '280' + newvalue: 'Controller Area Network' + - value: '512' + newvalue: ARPHRD_PPP + - value: '513' + newvalue: 'Cisco HDLC' + - value: '516' + newvalue: LAPB + - value: '517' + newvalue: 'Digital''s DDCMP protocol' + - value: '518' + newvalue: 'Raw HDLC' + - value: '519' + newvalue: 'Raw IP' + - value: '768' + newvalue: 'IPIP tunnel' + - value: '769' + newvalue: 'IP6IP6 tunnel' + - value: '770' + newvalue: 'Frame Relay Access Device' + - value: '771' + newvalue: 'SKIP vif' + - value: '772' + newvalue: 'Loopback device' + - value: '773' + newvalue: 'Localtalk device' + - value: '774' + newvalue: 'Fiber Distributed Data Interface' + - value: '775' + newvalue: 'AP1000 BIF' + - value: '776' + newvalue: 'sit0 device - IPv6-in-IPv4' + - value: '777' + newvalue: 'IP over DDP tunneller' + - value: '778' + newvalue: 'GRE over IP' + - value: '779' + newvalue: 'PIMSM register interface' + - value: '780' + newvalue: 'High Performance Parallel Interface' + - value: '781' + newvalue: 'Nexus 64Mbps Ash' + - value: '782' + newvalue: 'Acorn Econet' + - value: '783' + newvalue: Linux-IrDA + - value: '784' + newvalue: 'Point to point fibrechannel' + - value: '785' + newvalue: 'Fibrechannel arbitrated loop' + - value: '786' + newvalue: 'Fibrechannel public loop' + - value: '787' + newvalue: 'Fibrechannel fabric' + - value: '800' + newvalue: 'Magic type ident for TR' + - value: '801' + newvalue: 'IEEE 802.11' + - value: '802' + newvalue: 'IEEE 802.11 + Prism2 header' + - value: '803' + newvalue: 'IEEE 802.11 + radiotap header' + - value: '804' + newvalue: ARPHRD_IEEE802154 + - value: '805' + newvalue: 'IEEE 802.15.4 network monitor' + - value: '820' + newvalue: 'PhoNet media type' + - value: '821' + newvalue: 'PhoNet pipe header' + - value: '822' + newvalue: 'CAIF media type' + - value: '823' + newvalue: 'GRE over IPv6' + - value: '824' + newvalue: 'Netlink header' + - value: '825' + newvalue: 'IPv6 over LoWPAN' + - value: '826' + newvalue: 'Vsock monitor header' + - uuid: 962eedfc6b94434da11469002c5aade4 + name: zabbix.host.available + mappings: + - value: '0' + newvalue: 'not available' + - value: '1' + newvalue: available + - value: '2' + newvalue: unknown + - uuid: 345861762e9c4494b1bfac133cf7cedb + name: 'Zabbix agent ping status' + mappings: + - value: '1' + newvalue: Up + triggers: + - uuid: 5c0fdefe4e554e7eaf0887912e312fb9 + expression: 'last(/Linux Autoregistration/proc.num)/last(/Linux Autoregistration/kernel.maxproc)*100>80' + name: 'Linux: Getting closer to process limit' + event_name: 'Linux: Getting closer to process limit (over 80% used)' + opdata: '{ITEM.LASTVALUE1} active, {ITEM.LASTVALUE2} limit.' + priority: WARNING + tags: + - tag: scope + value: performance + graphs: + - uuid: 0a9148b2aa704d55a521cbd7649b5325 + name: 'Linux: CPU jumps' + graph_items: + - color: 199C0D + item: + host: 'Linux Autoregistration' + key: system.cpu.switches + - sortorder: '1' + color: F63100 + item: + host: 'Linux Autoregistration' + key: system.cpu.intr + - uuid: a27c5f141fa04a41a8f6764774bd5cf2 + name: 'Linux: CPU usage' + type: STACKED + ymin_type_1: FIXED + ymax_type_1: FIXED + graph_items: + - color: 199C0D + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,system]' + - sortorder: '1' + color: F63100 + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,user]' + - sortorder: '2' + color: 00611C + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,nice]' + - sortorder: '3' + color: F7941D + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,iowait]' + - sortorder: '4' + color: FC6EA3 + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,steal]' + - sortorder: '5' + color: 6C59DC + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,interrupt]' + - sortorder: '6' + color: C7A72D + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,softirq]' + - sortorder: '7' + color: BA2A5D + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,guest]' + - sortorder: '8' + color: F230E0 + item: + host: 'Linux Autoregistration' + key: 'system.cpu.util[,guest_nice]' + - uuid: f5f3e30199b64fb8adfc6f823ce9bb6b + name: 'Linux: CPU utilization' + ymin_type_1: FIXED + ymax_type_1: FIXED + graph_items: + - drawtype: GRADIENT_LINE + color: 199C0D + item: + host: 'Linux Autoregistration' + key: system.cpu.util + - uuid: cfdbe8775d2d462d9a78c68739f9e0c4 + name: 'Linux: Memory usage' + ymin_type_1: FIXED + graph_items: + - drawtype: BOLD_LINE + color: 199C0D + item: + host: 'Linux Autoregistration' + key: 'vm.memory.size[total]' + - sortorder: '1' + drawtype: GRADIENT_LINE + color: F63100 + item: + host: 'Linux Autoregistration' + key: 'vm.memory.size[available]' + - uuid: 6343fb61837f4068a07b51e858160784 + name: 'Linux: Memory utilization' + ymin_type_1: FIXED + ymax_type_1: FIXED + graph_items: + - drawtype: GRADIENT_LINE + color: 199C0D + item: + host: 'Linux Autoregistration' + key: vm.memory.utilization + - uuid: 23e6534f2c1946b1b4840636e559c261 + name: 'Linux: Processes' + graph_items: + - color: 199C0D + item: + host: 'Linux Autoregistration' + key: proc.num + - sortorder: '1' + color: F63100 + item: + host: 'Linux Autoregistration' + key: 'proc.num[,,run]' + - uuid: a52f6b2513f341d4b8af16d9f166f968 + name: 'Linux: Swap usage' + graph_items: + - color: 199C0D + item: + host: 'Linux Autoregistration' + key: 'system.swap.size[,free]' + - sortorder: '1' + color: F63100 + item: + host: 'Linux Autoregistration' + key: 'system.swap.size[,total]' + - uuid: 4d3ef460e7b0490383e9fc7ab788dab0 + name: 'Linux: System load' + ymin_type_1: FIXED + graph_items: + - color: 199C0D + item: + host: 'Linux Autoregistration' + key: 'system.cpu.load[all,avg1]' + - sortorder: '1' + color: F63100 + item: + host: 'Linux Autoregistration' + key: 'system.cpu.load[all,avg5]' + - sortorder: '2' + color: 00611C + item: + host: 'Linux Autoregistration' + key: 'system.cpu.load[all,avg15]' + - sortorder: '3' + color: F7941D + yaxisside: RIGHT + item: + host: 'Linux Autoregistration' + key: system.cpu.num diff --git a/roles/zabbix/zabbix_server/files/templates/linux_hosts.yaml b/roles/zabbix/zabbix_server/files/templates/linux_hosts.yaml new file mode 100644 index 0000000000..9d8ad284e6 --- /dev/null +++ b/roles/zabbix/zabbix_server/files/templates/linux_hosts.yaml @@ -0,0 +1,354 @@ +zabbix_export: + version: '7.0' + template_groups: + - uuid: a333cbd6a3ad44baaa4eee4b0c0b1bec + name: Fedora + templates: + - uuid: 28d6d64b3a5041b7a5d2d166b26f25a8 + template: 'Linux Hosts' + name: 'Linux Hosts' + description: 'Builds upon "Linux Autoregistration" to enable the remaining triggers / prototypes for non-Koji hosts' + templates: + - name: 'Linux Autoregistration' + groups: + - name: Fedora + discovery_rules: + - uuid: 34e769e2da244b338e7d3b1126e6bcc1 + name: 'Block devices discovery' + type: ZABBIX_ACTIVE + key: vfs.dev.discovery + delay: 1h + filter: + evaltype: AND + conditions: + - macro: '{#DEVNAME}' + value: '{$VFS.DEV.DEVNAME.MATCHES}' + formulaid: A + - macro: '{#DEVNAME}' + value: '{$VFS.DEV.DEVNAME.NOT_MATCHES}' + operator: NOT_MATCHES_REGEX + formulaid: B + - macro: '{#DEVTYPE}' + value: disk + formulaid: C + lifetime: 30d + enabled_lifetime_type: DISABLE_NEVER + item_prototypes: + - uuid: 9a0448cf8a184d52a7872df410f25d6c + name: '{#DEVNAME}: Disk average queue size (avgqu-sz)' + type: DEPENDENT + key: 'vfs.dev.queue_size[{#DEVNAME}]' + delay: '0' + history: 7d + value_type: FLOAT + description: 'The current average disk queue; the number of requests outstanding on the disk while the performance data is being collected.' + preprocessing: + - type: JSONPATH + parameters: + - '$[10]' + - type: CHANGE_PER_SECOND + parameters: + - '' + - type: MULTIPLIER + parameters: + - '0.001' + master_item: + key: 'vfs.file.contents[/sys/block/{#DEVNAME}/stat]' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: 1b3559f0d90948f0a72c2fdfdc80930c + name: '{#DEVNAME}: Disk read request avg waiting time (r_await)' + type: CALCULATED + key: 'vfs.dev.read.await[{#DEVNAME}]' + history: 7d + value_type: FLOAT + units: '!ms' + params: '(last(//vfs.dev.read.time.rate[{#DEVNAME}])/(last(//vfs.dev.read.rate[{#DEVNAME}])+(last(//vfs.dev.read.rate[{#DEVNAME}])=0)))*1000*(last(//vfs.dev.read.rate[{#DEVNAME}]) > 0)' + description: 'This formula contains two Boolean expressions that evaluate to 1 or 0 in order to set the calculated metric to zero and to avoid the exception - division by zero.' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: 3bb5f84b2e954c28843fa1fb3898c035 + name: '{#DEVNAME}: Disk read rate' + type: DEPENDENT + key: 'vfs.dev.read.rate[{#DEVNAME}]' + delay: '0' + history: 7d + value_type: FLOAT + units: '!r/s' + description: 'r/s (read operations per second) - the number (after merges) of read requests completed per second for the device.' + preprocessing: + - type: JSONPATH + parameters: + - '$[0]' + - type: CHANGE_PER_SECOND + parameters: + - '' + master_item: + key: 'vfs.file.contents[/sys/block/{#DEVNAME}/stat]' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: df02934521b54864b2538b764c5d549c + name: '{#DEVNAME}: Disk read time (rate)' + type: DEPENDENT + key: 'vfs.dev.read.time.rate[{#DEVNAME}]' + delay: '0' + history: 7d + value_type: FLOAT + description: 'The rate of total read time counter; used in `r_await` calculation.' + preprocessing: + - type: JSONPATH + parameters: + - '$[3]' + - type: CHANGE_PER_SECOND + parameters: + - '' + - type: MULTIPLIER + parameters: + - '0.001' + master_item: + key: 'vfs.file.contents[/sys/block/{#DEVNAME}/stat]' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: 72546bd5eefb4ac7a0b2992a25e5f0c6 + name: '{#DEVNAME}: Disk utilization' + type: DEPENDENT + key: 'vfs.dev.util[{#DEVNAME}]' + delay: '0' + history: 7d + value_type: FLOAT + units: '%' + description: 'This item is the percentage of elapsed time during which the selected disk drive was busy while servicing read or write requests.' + preprocessing: + - type: JSONPATH + parameters: + - '$[9]' + - type: CHANGE_PER_SECOND + parameters: + - '' + - type: MULTIPLIER + parameters: + - '0.1' + master_item: + key: 'vfs.file.contents[/sys/block/{#DEVNAME}/stat]' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: 8422c37735774134996be62580e7bf10 + name: '{#DEVNAME}: Disk write request avg waiting time (w_await)' + type: CALCULATED + key: 'vfs.dev.write.await[{#DEVNAME}]' + history: 7d + value_type: FLOAT + units: '!ms' + params: '(last(//vfs.dev.write.time.rate[{#DEVNAME}])/(last(//vfs.dev.write.rate[{#DEVNAME}])+(last(//vfs.dev.write.rate[{#DEVNAME}])=0)))*1000*(last(//vfs.dev.write.rate[{#DEVNAME}]) > 0)' + description: 'This formula contains two Boolean expressions that evaluate to 1 or 0 in order to set the calculated metric to zero and to avoid the exception - division by zero.' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: 4ba78909402d4bb8ab32f12c679ea3dc + name: '{#DEVNAME}: Disk write rate' + type: DEPENDENT + key: 'vfs.dev.write.rate[{#DEVNAME}]' + delay: '0' + history: 7d + value_type: FLOAT + units: '!w/s' + description: 'w/s (write operations per second) - the number (after merges) of write requests completed per second for the device.' + preprocessing: + - type: JSONPATH + parameters: + - '$[4]' + - type: CHANGE_PER_SECOND + parameters: + - '' + master_item: + key: 'vfs.file.contents[/sys/block/{#DEVNAME}/stat]' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: 7717dd9841004fa08b35b0e9f42bffae + name: '{#DEVNAME}: Disk write time (rate)' + type: DEPENDENT + key: 'vfs.dev.write.time.rate[{#DEVNAME}]' + delay: '0' + history: 7d + value_type: FLOAT + description: 'The rate of total write time counter; used in `w_await` calculation.' + preprocessing: + - type: JSONPATH + parameters: + - '$[7]' + - type: CHANGE_PER_SECOND + parameters: + - '' + - type: MULTIPLIER + parameters: + - '0.001' + master_item: + key: 'vfs.file.contents[/sys/block/{#DEVNAME}/stat]' + tags: + - tag: component + value: storage + - tag: disk + value: '{#DEVNAME}' + - uuid: 39877664726f4886aa88f3d1592bbcb2 + name: '{#DEVNAME}: Get stats' + type: ZABBIX_ACTIVE + key: 'vfs.file.contents[/sys/block/{#DEVNAME}/stat]' + history: '0' + value_type: TEXT + trends: '0' + description: 'The contents of get `/sys/block/{#DEVNAME}/stat` to get the disk statistics.' + preprocessing: + - type: JAVASCRIPT + parameters: + - 'return JSON.stringify(value.trim().split(/ +/));' + tags: + - tag: component + value: raw + trigger_prototypes: + - uuid: e7d0c8f816de481b8790709b24c44c81 + expression: 'min(/Linux Hosts/vfs.dev.read.await[{#DEVNAME}],15m) > {$VFS.DEV.READ.AWAIT.WARN:"{#DEVNAME}"} or min(/Linux Hosts/vfs.dev.write.await[{#DEVNAME}],15m) > {$VFS.DEV.WRITE.AWAIT.WARN:"{#DEVNAME}"}' + name: '{#DEVNAME}: Disk read/write request responses are too high' + event_name: '{#DEVNAME}: Disk read/write request responses are too high (read > {$VFS.DEV.READ.AWAIT.WARN:"{#DEVNAME}"} ms for 15m or write > {$VFS.DEV.WRITE.AWAIT.WARN:"{#DEVNAME}"} ms for 15m)' + priority: WARNING + description: 'This trigger might indicate the disk {#DEVNAME} saturation.' + manual_close: 'YES' + tags: + - tag: scope + value: performance + graph_prototypes: + - uuid: feca6a365b8d49d2a66ff4bfac089fc9 + name: '{#DEVNAME}: Disk average waiting time' + graph_items: + - color: 199C0D + item: + host: 'Linux Hosts' + key: 'vfs.dev.read.await[{#DEVNAME}]' + - sortorder: '1' + drawtype: GRADIENT_LINE + color: F63100 + item: + host: 'Linux Hosts' + key: 'vfs.dev.write.await[{#DEVNAME}]' + - uuid: b136583f822a4d48a52a17f4bb0d07d8 + name: '{#DEVNAME}: Disk read/write rates' + graph_items: + - color: 199C0D + item: + host: 'Linux Hosts' + key: 'vfs.dev.read.rate[{#DEVNAME}]' + - sortorder: '1' + drawtype: GRADIENT_LINE + color: F63100 + item: + host: 'Linux Hosts' + key: 'vfs.dev.write.rate[{#DEVNAME}]' + - uuid: 8863772fb82b49a891ea50cbec5cdd06 + name: '{#DEVNAME}: Disk utilization and queue' + graph_items: + - color: 199C0D + yaxisside: RIGHT + item: + host: 'Linux Hosts' + key: 'vfs.dev.queue_size[{#DEVNAME}]' + - sortorder: '1' + drawtype: GRADIENT_LINE + color: F63100 + item: + host: 'Linux Hosts' + key: 'vfs.dev.util[{#DEVNAME}]' + preprocessing: + - type: DISCARD_UNCHANGED_HEARTBEAT + parameters: + - 1h + triggers: + - uuid: cd709f79294341b4ad24213adc2cbfd5 + expression: 'min(/Linux Hosts/system.cpu.util,5m)>{$CPU.UTIL.CRIT}' + name: 'Linux: High CPU utilization' + event_name: 'Linux: High CPU utilization (over {$CPU.UTIL.CRIT}% for 5m)' + opdata: 'Current utilization: {ITEM.LASTVALUE1}' + priority: WARNING + description: 'The CPU utilization is too high. The system might be slow to respond.' + dependencies: + - name: 'Linux: Load average is too high' + expression: | + min(/Linux Hosts/system.cpu.load[all,avg1],5m)/last(/Linux Hosts/system.cpu.num)>{$LOAD_AVG_PER_CPU.MAX.WARN} + and last(/Linux Hosts/system.cpu.load[all,avg5])>0 + and last(/Linux Hosts/system.cpu.load[all,avg15])>0 + tags: + - tag: scope + value: performance + - uuid: d3e1eaa726cc4ab8b2dcadae64a0fd55 + expression: 'min(/Linux Hosts/vm.memory.utilization,5m)>{$MEMORY.UTIL.MAX}' + name: 'Linux: High memory utilization' + event_name: 'Linux: High memory utilization (>{$MEMORY.UTIL.MAX}% for 5m)' + priority: AVERAGE + description: 'The system is running out of free memory.' + dependencies: + - name: 'Linux: Lack of available memory' + expression: 'max(/Linux Hosts/vm.memory.size[available],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Linux Hosts/vm.memory.size[total])>0' + tags: + - tag: scope + value: capacity + - tag: scope + value: performance + - uuid: 61ce552ec3774a01b89de3edce1d00ae + expression: 'max(/Linux Hosts/system.swap.size[,pfree],5m)<{$SWAP.PFREE.MIN.WARN} and last(/Linux Hosts/system.swap.size[,total])>0' + name: 'Linux: High swap space usage' + event_name: 'Linux: High swap space usage (less than {$SWAP.PFREE.MIN.WARN}% free)' + opdata: 'Free: {ITEM.LASTVALUE1}, total: {ITEM.LASTVALUE2}' + priority: WARNING + description: 'If there is no swap configured, this trigger is ignored.' + dependencies: + - name: 'Linux: High memory utilization' + expression: 'min(/Linux Hosts/vm.memory.utilization,5m)>{$MEMORY.UTIL.MAX}' + - name: 'Linux: Lack of available memory' + expression: 'max(/Linux Hosts/vm.memory.size[available],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Linux Hosts/vm.memory.size[total])>0' + tags: + - tag: scope + value: capacity + - uuid: 6e638060373b44398dd22b01564bc326 + expression: 'max(/Linux Hosts/vm.memory.size[available],5m)<{$MEMORY.AVAILABLE.MIN} and last(/Linux Hosts/vm.memory.size[total])>0' + name: 'Linux: Lack of available memory' + event_name: 'Linux: Lack of available memory (<{$MEMORY.AVAILABLE.MIN} of {ITEM.VALUE2})' + opdata: 'Available: {ITEM.LASTVALUE1}, total: {ITEM.LASTVALUE2}' + priority: AVERAGE + tags: + - tag: scope + value: capacity + - tag: scope + value: performance + - uuid: cebd3b42cd2042b8a76eac570ce70b4c + expression: | + min(/Linux Hosts/system.cpu.load[all,avg1],5m)/last(/Linux Hosts/system.cpu.num)>{$LOAD_AVG_PER_CPU.MAX.WARN} + and last(/Linux Hosts/system.cpu.load[all,avg5])>0 + and last(/Linux Hosts/system.cpu.load[all,avg15])>0 + name: 'Linux: Load average is too high' + event_name: 'Linux: Load average is too high (per CPU load over {$LOAD_AVG_PER_CPU.MAX.WARN} for 5m)' + opdata: 'Load averages(1m 5m 15m): ({ITEM.LASTVALUE1} {ITEM.LASTVALUE3} {ITEM.LASTVALUE4}), # of CPUs: {ITEM.LASTVALUE2}' + priority: AVERAGE + description: 'The load average per CPU is too high. The system may be slow to respond.' + tags: + - tag: scope + value: capacity + - tag: scope + value: performance diff --git a/roles/zabbix/zabbix_server/tasks/configure_api.yml b/roles/zabbix/zabbix_server/tasks/configure_api.yml new file mode 100644 index 0000000000..6a60f1846f --- /dev/null +++ b/roles/zabbix/zabbix_server/tasks/configure_api.yml @@ -0,0 +1,163 @@ +--- +# Use a block so we can specify the connection vars once +- name: API Block + vars: + ansible_zabbix_auth_key: "{{ (env == 'staging') | ternary(zabbix_stg_hostname, zabbix_hostname) }}" + ansible_network_os: community.zabbix.zabbix + ansible_connection: httpapi + ansible_httpapi_port: 443 + ansible_httpapi_use_ssl: true + ansible_httpapi_validate_certs: false + ansible_host: "{{ (env == 'staging') | ternary(zabbix_stg_hostname, zabbix_hostname) }}" + ansible_zabbix_url_path: "" # If Zabbix WebUI runs on non-default (zabbix) path ,e.g. http:///zabbixeu + block: + - name: Create a webhook mediatype + community.zabbix.zabbix_mediatype: + name: Matrix + type: "webhook" + description: "Matrix webhook - See https://github.com/jooola/zabbix-matrix-webhook#readme" + webhook_script: "{{ lookup('file', './matrix_mediatype.js') }}" + webhook_params: + - name: alert_message + value: "{ALERT.MESSAGE}" + - name: alert_subject + value: "{ALERT.SUBJECT}" + - name: enable_colors + value: "true" + - name: enable_icons + value: "true" + - name: event_is_problem + value: "{EVENT.VALUE}" + - name: event_is_update + value: "{EVENT.UPDATE.STATUS}" + - name: event_severity + value: "{EVENT.NSEVERITY}" + - name: event_url + value: "https://{{ (env == 'staging') | ternary(zabbix_stg_hostname, zabbix_hostname) }}/tr_events.php?triggerid={TRIGGER.ID}&eventid={EVENT.ID}" + - name: http_proxy + value: "" + - name: matrix_room + value: "{ALERT.SENDTO}" + - name: matrix_token + value: "{{ (env == 'staging') | ternary(zabbix_stg_matrix_token, zabbix_matrix_token) }}" + - name: matrix_url + value: "https://fedora.ems.host" + message_templates: + - subject: "{EVENT.NAME} ({EVENT.ID})" + body: "Severity: {EVENT.SEVERITY} started at {EVENT.DATE} {EVENT.TIME} on {HOST.NAME}" + eventsource: triggers + recovery: operations + - subject: "{EVENT.NAME} [{EVENT.DURATION}] ({EVENT.ID})" + body: "Severity: {EVENT.SEVERITY} resolved at {EVENT.RECOVERY.DATE} {EVENT.RECOVERY.TIME} on {HOST.NAME}" + eventsource: triggers + recovery: recovery_operations + - subject: "{EVENT.NAME} ({EVENT.AGE})" + body: | + {USER.FULLNAME} {EVENT.UPDATE.ACTION} problem at {EVENT.UPDATE.DATE} {EVENT.UPDATE.TIME} + {EVENT.UPDATE.MESSAGE} + + Current problem status: {EVENT.STATUS} + Age: {EVENT.AGE} + Acknowledged: {EVENT.ACK.STATUS} + eventsource: triggers + recovery: update_operations + tags: + - zabbix_configuration + - zabbix_triggers + + - name: Create a new Zabbix user for Matrix triggers + community.zabbix.zabbix_user: + username: matrix-bot + name: Matrix + surname: Bot + usrgrps: + - Zabbix administrators + passwd: "{{ (env == 'staging') | ternary(zabbix_stg_botuser_pwd, zabbix_botuser_pwd) }}" + user_medias: + - mediatype: Matrix + sendto: "{{ (env == 'staging') | ternary(zabbix_stg_matrix_roomid, zabbix_matrix_roomid) }}" + period: 1-7,00:00-24:00 + severity: + not_classified: yes + information: yes + warning: yes + average: yes + high: yes + disaster: yes + active: yes + state: present + tags: + - zabbix_configuration + - zabbix_users + + - name: Send alerts to Matrix + community.zabbix.zabbix_action: + name: "Send alerts to Matrix" + event_source: "trigger" + state: present + status: enabled + esc_period: 1m + conditions: + - type: "trigger_severity" + operator: ">=" + value: "Information" + operations: + - type: send_message + media_type: "Matrix" + send_to_users: + - "matrix-bot" + recovery_operations: + - type: send_message + media_type: "Matrix" + send_to_users: + - "matrix-bot" + tags: + - zabbix_configuration + - zabbix_users + - zabbix_triggers + + # Templates seem to always report a change :/ + - name: Import Base Auto-registration template + community.zabbix.zabbix_template: + template_yaml: "{{ lookup('file', 'templates/linux_autoregister.yaml') }}" + state: present + tags: + - zabbix_configuration + - zabbix_templates + + # Templates seem to always report a change :/ + - name: Import dependant Linux Host template + community.zabbix.zabbix_template: + template_yaml: "{{ lookup('file', 'templates/linux_hosts.yaml') }}" + state: present + tags: + - zabbix_configuration + - zabbix_templates + + # PSK config: this can't be checked so it will always report a change + - name: Construct PSK filename + ansible.builtin.set_fact: + psk_file: "{{ private }}/files/zabbix/fedora{{ env_suffix }}.psk" + - name: Configure autoregistration via PSK + community.zabbix.zabbix_autoregister: + tls_accept: + - tls_with_psk + tls_psk_identity: "{{ zabbix_tls_psk_identity }}" + tls_psk: "{{ lookup('ansible.builtin.file', psk_file) }}" + tags: + - zabbix_configuration + + - name: Configure autoregistration action + community.zabbix.zabbix_action: + name: "Add host to base template" + event_source: "auto_registration" + state: present + status: enabled + esc_period: 1m + operations: + - type: add_host + - type: link_to_template + templates: + - Linux Autoregistration + tags: + - zabbix_configuration diff --git a/roles/zabbix/zabbix_server/tasks/main.yml b/roles/zabbix/zabbix_server/tasks/main.yml index babbc057c5..b650290cfb 100644 --- a/roles/zabbix/zabbix_server/tasks/main.yml +++ b/roles/zabbix/zabbix_server/tasks/main.yml @@ -4,5 +4,11 @@ tags: - zabbix-configuration +- name: Configure Zabbix via api + ansible.builtin.include_tasks: configure_api.yml + tags: + - zabbix-configuration + when: env == "staging" + # - include_tasks: plugins.yml - include_tasks: start_services.yml