Add Telegraf gather errors alert

Change-Id: I6b3b7c86a21a83f1cc44991a235f063e27ae747c
Related-PROD: PROD-30675
diff --git a/telegraf/meta/prometheus.yml b/telegraf/meta/prometheus.yml
index 46d7fe3..258cd81 100644
--- a/telegraf/meta/prometheus.yml
+++ b/telegraf/meta/prometheus.yml
@@ -27,6 +27,17 @@
         summary: "The {{ $labels.host }} node is down"
         description: "The {{ $labels.host }} node is unreachable at {{ $labels.url }}, the Telegraf and Fluentd targets on the {{ $labels.host }} node are down."
 {%- endraw %}
+    TelegrafGatherErrors:
+      if: >-
+        rate(internal_agent_gather_errors[10m]) > 0
+      labels:
+        severity: major
+        service: telegraf
+      annotations:
+        summary: "Telegraf failed to gather metrics"
+{%- raw %}
+        description: "Telegraf has gathering errors on the {{ $labels.host }} node for the last 10 minutes."
+{%- endraw %}
 {%- if pillar.neutron is defined %}
   {%- if pillar.neutron.get('gateway', {}).get('enabled', False) == True or (pillar.neutron.get('compute',{}).get('enabled', False) == True and pillar.neutron.get('compute',{}).get('dhcp_agent_enabled', False) == True) %}
     OVSInstanceArpingCheckDown: