Blame - heka/alarm/openstack_compute.yml - salt-models/reclass-system

blob: 81b8be95cf0dc75488c243a01cd6e7b214d0e58a [file] [log] [blame]

Ondrej Smola	03ff34e	2016-12-01 01:30:33 +0100	[diff] [blame]	1	parameters:
				2	heka:
				3	metric_collector:
				4	trigger:
				5	# Override the linux_system_cpu_critical and linux_system_cpu_warning
				6	# triggers to use specific rules on control nodes
				7	linux_system_cpu_critical:
				8	description: 'The CPU usage is too high (compute node)'
				9	severity: critical
				10	rules:
				11	- metric: cpu_wait
				12	relational_operator: '>='
				13	threshold: 30
				14	window: 120
				15	periods: 0
				16	function: avg
				17	linux_system_cpu_warning:
				18	description: 'The CPU usage is high (compute node)'
				19	severity: 'warning'
				20	enabled: 'true'
				21	rules:
				22	- metric: cpu_wait
				23	relational_operator: '>='
				24	threshold: 20
				25	window: 120
				26	periods: 0
				27	function: avg
				28	alarm:
				29	# Tag all the system alarm metrics with "node_role: compute". This
				30	# to be able to create an alarm cluster for compute nodes.
				31	linux_system_cpu:
				32	alerting: enabled
				33	triggers:
				34	- linux_system_cpu_critical
				35	- linux_system_cpu_warning
				36	dimension:
				37	node_role: compute
				38	linux_system_swap:
				39	alerting: enabled
				40	triggers:
				41	- linux_system_swap_usage_critical
				42	- linux_system_swap_activity_warning
				43	- linux_system_swap_usage_warning
				44	dimension:
				45	node_role: compute
				46	linux_system_root_fs:
				47	alerting: enabled
				48	triggers:
				49	- linux_system_root_fs_critical
				50	- linux_system_root_fs_warning
				51	dimension:
				52	node_role: compute
				53	linux_system_network_rx:
				54	alerting: enabled
				55	triggers:
				56	- linux_system_network_critical_dropped_rx
				57	- linux_system_network_warning_dropped_rx
				58	dimension:
				59	node_role: compute
				60	linux_system_network_tx:
				61	alerting: enabled
				62	triggers:
				63	- linux_system_network_critical_dropped_tx
				64	- linux_system_network_warning_dropped_tx
				65	dimension:
				66	node_role: compute
				67	linux_system_hdd_errors:
				68	alerting: enabled_with_notification
				69	triggers:
				70	- linux_system_hdd_errors_critical
				71	dimension:
				72	node_role: compute
				73	aggregator:
				74	alarm_cluster:
				75	compute_nodes:
Éric Lemoine	b308691	2016-12-14 15:15:39 +0000	[diff] [blame]	76	policy: status_of_members
Ondrej Smola	03ff34e	2016-12-01 01:30:33 +0100	[diff] [blame]	77	alerting: enabled_with_notification
				78	group_by: hostname
				79	match:
				80	node_role: compute
				81	members:
				82	- linux_system_cpu
				83	- linux_system_swap
				84	- linux_system_root_fs
				85	- linux_system_network_rx
				86	- linux_system_network_tx
				87	- linux_system_hdd_errors
				88	dimension:
				89	cluster_name: compute
				90	nagios_host: 01-node-clusters