| {%- from "linux/map.jinja" import monitoring with context %} |
| metric_collector: |
| trigger: |
| linux_system_cpu_critical: |
| description: 'The CPU usage is too high.' |
| severity: critical |
| rules: |
| - metric: cpu_wait |
| relational_operator: '>=' |
| threshold: 35 |
| window: 120 |
| periods: 0 |
| function: avg |
| - metric: cpu_idle |
| relational_operator: <= |
| threshold: 5 |
| window: 120 |
| function: avg |
| linux_system_cpu_warning: |
| description: 'The CPU wait times are high.' |
| severity: warning |
| rules: |
| - metric: cpu_wait |
| relational_operator: '>=' |
| threshold: 15 |
| window: 120 |
| periods: 0 |
| function: avg |
| linux_system_swap_usage_critical: |
| description: 'There is no more swap free space' |
| severity: critical |
| rules: |
| - metric: swap_free |
| relational_operator: '==' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: max |
| linux_system_swap_activity_warning: |
| description: 'The swap activity is high' |
| severity: warning |
| rules: |
| - metric: swap_io_in |
| relational_operator: '>=' |
| threshold: 1048576 # 1 Mb/s |
| window: 120 |
| periods: 0 |
| function: avg |
| - metric: swap_io_out |
| relational_operator: '>=' |
| threshold: 1048576 # 1 Mb/s |
| window: 120 |
| periods: 0 |
| function: avg |
| linux_system_swap_usage_warning: |
| description: 'The swap free space is low' |
| severity: warning |
| rules: |
| - metric: swap_percent_used |
| relational_operator: '>=' |
| threshold: 0.8 |
| window: 60 |
| periods: 0 |
| function: avg |
| linux_system_root_fs_warning: |
| description: "The root filesystem's free space is low" |
| severity: warning |
| rules: |
| - metric: fs_space_percent_free |
| field: |
| fs: '/' |
| relational_operator: '<' |
| threshold: 10 |
| window: 60 |
| periods: 0 |
| function: min |
| linux_system_root_fs_critical: |
| description: "The root filesystem's free space is too low" |
| severity: critical |
| rules: |
| - metric: fs_space_percent_free |
| field: |
| fs: '/' |
| relational_operator: '<' |
| threshold: 5 |
| window: 60 |
| periods: 0 |
| function: min |
| linux_system_network_warning_dropped_rx: |
| description: 'Some received packets have been dropped' |
| severity: warning |
| rules: |
| - metric: if_dropped_rx |
| relational_operator: '>' |
| threshold: 100 |
| window: 60 |
| periods: 0 |
| function: avg |
| linux_system_network_critical_dropped_rx: |
| description: 'Too many received packets have been dropped' |
| severity: critical |
| rules: |
| - metric: if_dropped_rx |
| relational_operator: '>' |
| threshold: 1000 |
| window: 60 |
| periods: 0 |
| function: avg |
| linux_system_network_warning_dropped_tx: |
| description: 'Some transmitted packets have been dropped' |
| severity: warning |
| rules: |
| - metric: if_dropped_tx |
| relational_operator: '>' |
| threshold: 100 |
| window: 60 |
| periods: 0 |
| function: avg |
| linux_system_network_critical_dropped_tx: |
| description: 'Too many transmitted packets have been dropped' |
| severity: critical |
| rules: |
| - metric: if_dropped_tx |
| relational_operator: '>' |
| threshold: 1000 |
| function: avg |
| window: 60 |
| linux_system_hdd_errors_critical: |
| description: 'Errors on hard drive(s) have been detected' |
| severity: critical |
| no_data_policy: okay |
| rules: |
| - metric: hdd_errors_rate |
| group_by: [device] |
| relational_operator: '>' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: max |
| {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %} |
| linux_bond_status_critical: |
| description: Bond members are down. |
| rules: |
| - function: last |
| metric: bond_status_links_down |
| periods: 0 |
| relational_operator: '>' |
| threshold: 0 |
| window: 120 |
| severity: critical |
| {%- endif %} |
| alarm: |
| linux_system_cpu: |
| alerting: enabled |
| triggers: |
| - linux_system_cpu_warning |
| - linux_system_cpu_critical |
| linux_system_swap: |
| alerting: enabled |
| triggers: |
| - linux_system_swap_usage_critical |
| - linux_system_swap_activity_warning |
| - linux_system_swap_usage_warning |
| linux_system_root_fs: |
| alerting: enabled |
| triggers: |
| - linux_system_root_fs_critical |
| - linux_system_root_fs_warning |
| linux_system_network_rx: |
| alerting: enabled |
| triggers: |
| - linux_system_network_critical_dropped_rx |
| - linux_system_network_warning_dropped_rx |
| linux_system_network_tx: |
| alerting: enabled |
| triggers: |
| - linux_system_network_critical_dropped_tx |
| - linux_system_network_warning_dropped_tx |
| linux_system_hdd_errors: |
| alerting: enabled_with_notification |
| triggers: |
| - linux_system_hdd_errors_critical |
| {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces is list %} |
| linux_bond_status: |
| alerting: enabled |
| triggers: |
| - linux_bond_status_critical |
| {%- endif %} |