Add alert and recording rules for OVS arping checker

Change-Id: Ie991004aeaa2fbd1ac2c1d2c797594c995e2a9fb
Related-PROD: PROD-27908
diff --git a/telegraf/meta/prometheus.yml b/telegraf/meta/prometheus.yml
index c93b7f3..87ba22b 100644
--- a/telegraf/meta/prometheus.yml
+++ b/telegraf/meta/prometheus.yml
@@ -27,6 +27,34 @@
         summary: "The {{ $labels.host }} node is down"
         description: "The {{ $labels.host }} node is unreachable at {{ $labels.url }}, the Telegraf and Fluentd targets on the {{ $labels.host }} node are down."
 {%- endraw %}
+{%- if pillar.neutron is defined %}
+  {%- if pillar.neutron.get('gateway', {}).get('enabled', False) == True or (pillar.neutron.get('compute',{}).get('enabled', False) == True and pillar.neutron.get('compute',{}).get('dhcp_agent_enabled', False) == True) %}
+    OVSInstanceArpingCheckDown:
+      if: instance_arping_check_up == 0
+      for: 2m
+      labels:
+        severity: major
+        service: ovs
+      annotations:
+        summary: "The OVS instance arping check is down"
+{%- raw %}
+        description: "The OVS instance arping check on the {{ $labels.host }} node is down for 2 minutes."
+{%- endraw %}
+  recording:
+    instance_id:instance_arping_success:
+      query: >-
+        avg(instance_arping_success) by (id)
+    instance_id:instance_arping_success:avg10m:for10m:
+      query: >-
+        avg_over_time(instance_id:instance_arping_success[10m]) and instance_id:instance_arping_success and instance_id:instance_arping_success offset 10m
+    total:instance_id:instance_arping_success:avg10m:for10m:
+      query: >-
+        count(instance_id:instance_arping_success:avg10m:for10m)
+    total:instance_id:instance_arping_success:avg10m:for10m:eq0:
+      query: >-
+        count(instance_id:instance_arping_success:avg10m:for10m == 0)
+  {%- endif %}
+{%- endif %}
 {%- if address is defined %}
   target:
     static: