Add triggers and alarms for Neutron agents

This patch adds monitoring of the neutron-data cluster in case Neutron
uses OVS instead of OpenContrail.
It adds new triggers for l3, metadata and openvswitch agents to
detect whether one agent is down, only 2 agents are still up or all are
down. It also adds a new AFD based on these triggers and its
corresponding GSE on the aggregator.

Change-Id: I71a4f87b66d4ef1c44efd394c2369aceed90098e
diff --git a/neutron/meta/heka.yml b/neutron/meta/heka.yml
index 33e0ec6..6778821 100644
--- a/neutron/meta/heka.yml
+++ b/neutron/meta/heka.yml
@@ -1,3 +1,10 @@
+{%- from "neutron/map.jinja" import server with context %}
+{%- if server.backend.engine == "ml2" %}
+{% set neutron_agents = ('l3', 'dhcp', 'metadata', 'openvswitch') %}
+{%- else %}
+{% set neutron_agents = () %}
+{%- endif %}
+
 log_collector:
   decoder:
     neutron:
@@ -79,6 +86,67 @@
         periods: 0
         function: last
     {%- endif %}
+    {%- for agent in neutron_agents %}
+    neutron_{{ agent }}_two_up:
+      description: 'Some Neutron {{ agent }} agents are down'
+      severity: warning
+      logical_operator: and
+      rules:
+      - metric: openstack_neutron_agents
+        field:
+          service: {{ agent }}
+          state: up
+        relational_operator: '>='
+        threshold: 2
+        window: 60
+        periods: 0
+        function: last
+      - metric: openstack_neutron_agents
+        field:
+          service: {{ agent }}
+          state: down
+        relational_operator: '>'
+        threshold: 0
+        window: 60
+        periods: 0
+        function: last
+    neutron_{{ agent }}_one_up:
+      description: 'Only one Neutron {{ agent }} agent is up'
+      severity: critical
+      logical_operator: and
+      rules:
+      - metric: openstack_neutron_agents
+        field:
+          service: {{ agent }}
+          state: up
+        relational_operator: '=='
+        threshold: 1
+        window: 60
+        periods: 0
+        function: last
+      - metric: openstack_neutron_agents
+        field:
+          service: {{ agent }}
+          state: '== down || == disabled'
+        relational_operator: '>'
+        threshold: 0
+        window: 60
+        periods: 0
+        function: last
+    neutron_{{ agent }}_zero_up:
+      description: 'All Neutron {{ agent }} agents are down or disabled'
+      severity: down
+      rules:
+      - metric: openstack_neutron_agents
+        field:
+          service: {{ agent }}
+          state: up
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: last
+    {%- endfor %}
   alarm:
     {%- if pillar.neutron.server is defined %}
     neutron_api_check:
@@ -87,6 +155,16 @@
       dimension:
         service: neutron-api-check
     {%- endif %}
+    {%- for agent in neutron_agents %}
+    neutron_{{ agent }}:
+      alerting: enabled
+      triggers:
+      - neutron_{{ agent }}_zero_up
+      - neutron_{{ agent }}_one_up
+      - neutron_{{ agent }}_two_up
+      dimension:
+        service: neutron-{{ agent }}
+    {%- endfor %}
 aggregator:
   alarm_cluster:
     neutron_logs:
@@ -133,3 +211,28 @@
       dimension:
         cluster_name: neutron-control
         nagios_host: 00-top-clusters
+    {%- for agent in neutron_agents %}
+    neutron_{{ agent }}:
+      policy: highest_severity
+      alerting: enabled
+      match:
+        service: neutron-{{ agent }}
+      members:
+      - neutron_{{ agent }}
+      dimension:
+        service: neutron-data
+        nagios_host: 01-service-clusters
+    {%- endfor %}
+    neutron_data:
+      policy: highest_severity
+      alerting: enabled_with_notification
+      match:
+        service: neutron-data
+      members:
+    {%- for agent in neutron_agents %}
+      - neutron_{{ agent }}
+    {%- endfor %}
+      dimension:
+        cluster_name: neutron-data
+        nagios_host: 00-top-clusters
+