Add alert and recording rules for OVS arping checker Change-Id: Ie991004aeaa2fbd1ac2c1d2c797594c995e2a9fb Related-PROD: PROD-27908

commit: 58000eff4b8ef22a3b3512c3fe0a92958485cd31 [log] [tgz]
author: Ildar Svetlov <isvetlov@mirantis.com> Mon Apr 22 14:44:35 2019 +0400
committer: Ildar Svetlov <isvetlov@mirantis.com> Tue Apr 23 13:51:11 2019 +0400
tree: 2da3457484b652b0ec2de3ca886e4af6dc04a1b5
parent: 03db7241b15f78ca1e6cca17ff927f30437390d5 [diff]
diff --git a/telegraf/meta/prometheus.yml b/telegraf/meta/prometheus.yml
index c93b7f3..87ba22b 100644
--- a/telegraf/meta/prometheus.yml
+++ b/telegraf/meta/prometheus.yml

@@ -27,6 +27,34 @@
         summary: "The {{ $labels.host }} node is down"
         description: "The {{ $labels.host }} node is unreachable at {{ $labels.url }}, the Telegraf and Fluentd targets on the {{ $labels.host }} node are down."
 {%- endraw %}
+{%- if pillar.neutron is defined %}
+  {%- if pillar.neutron.get('gateway', {}).get('enabled', False) == True or (pillar.neutron.get('compute',{}).get('enabled', False) == True and pillar.neutron.get('compute',{}).get('dhcp_agent_enabled', False) == True) %}
+    OVSInstanceArpingCheckDown:
+      if: instance_arping_check_up == 0
+      for: 2m
+      labels:
+        severity: major
+        service: ovs
+      annotations:
+        summary: "The OVS instance arping check is down"
+{%- raw %}
+        description: "The OVS instance arping check on the {{ $labels.host }} node is down for 2 minutes."
+{%- endraw %}
+  recording:
+    instance_id:instance_arping_success:
+      query: >-
+        avg(instance_arping_success) by (id)
+    instance_id:instance_arping_success:avg10m:for10m:
+      query: >-
+        avg_over_time(instance_id:instance_arping_success[10m]) and instance_id:instance_arping_success and instance_id:instance_arping_success offset 10m
+    total:instance_id:instance_arping_success:avg10m:for10m:
+      query: >-
+        count(instance_id:instance_arping_success:avg10m:for10m)
+    total:instance_id:instance_arping_success:avg10m:for10m:eq0:
+      query: >-
+        count(instance_id:instance_arping_success:avg10m:for10m == 0)
+  {%- endif %}
+{%- endif %}
 {%- if address is defined %}
   target:
     static:
commit	58000eff4b8ef22a3b3512c3fe0a92958485cd31	[log] [tgz]
author	Ildar Svetlov <isvetlov@mirantis.com>	Mon Apr 22 14:44:35 2019 +0400
committer	Ildar Svetlov <isvetlov@mirantis.com>	Tue Apr 23 13:51:11 2019 +0400
tree	2da3457484b652b0ec2de3ca886e4af6dc04a1b5
parent	03db7241b15f78ca1e6cca17ff927f30437390d5 [diff]