Add alarms and alarm clusters

Change-Id: I815e7c4973093ac3a7b9307700fb5e372c639aba
diff --git a/nova/files/grafana_influxdb.json b/nova/files/grafana_influxdb.json
index 5f9befd..9500f4b 100644
--- a/nova/files/grafana_influxdb.json
+++ b/nova/files/grafana_influxdb.json
@@ -115,7 +115,7 @@
                 {
                   "key": "cluster_name",
                   "operator": "=",
-                  "value": "nova-control-plane"
+                  "value": "nova-control"
                 }
               ]
             }
@@ -244,7 +244,7 @@
                 {
                   "key": "cluster_name",
                   "operator": "=",
-                  "value": "nova-data-plane"
+                  "value": "nova-compute"
                 }
               ]
             }
diff --git a/nova/meta/heka.yml b/nova/meta/heka.yml
index 6c317a1..de1a869 100644
--- a/nova/meta/heka.yml
+++ b/nova/meta/heka.yml
@@ -33,3 +33,292 @@
       decoder: "libvirt_decoder"
       splitter: "TokenSplitter"
     {%- endif %}
+metric_collector:
+  trigger:
+    {%- if pillar.nova.compute is defined %}
+    nova_fs_warning:
+      description: "The filesystem's free space is low (compute node)"
+      severity: warning
+      rules:
+      - metric: fs_space_percent_free
+        field:
+          fs: '/var/lib/nova'
+        relational_operator: '<'
+        threshold: 10
+        window: 60
+        periods: 0
+        function: min
+    nova_fs_critical:
+      description: "The filesystem's free space is too low (compute node)"
+      severity: critical
+      rules:
+      - metric: fs_space_percent_free
+        field:
+          fs: '/var/lib/nova'
+        relational_operator: '<'
+        threshold: 5
+        window: 60
+        periods: 0
+        function: min
+    {%- endif %}
+    nova_logs_error:
+      description: 'Too many errors have been detected in Nova logs'
+      severity: warning
+      no_data_policy: okay
+      rules:
+      - metric: log_messages
+        field:
+          service: nova
+          level: error
+        relational_operator: '>'
+        threshold: 0.1
+        window: 70
+        periods: 0
+        function: max
+    {%- if pillar.nova.controller is defined %}
+    nova_api_local_endpoint:
+      description: 'Nova API is locally down'
+      severity: down
+      rules:
+      - metric: openstack_check_local_api
+        field:
+          service: nova-api
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: last
+    {%- endif %}
+  alarm:
+    {%- if pillar.nova.compute is defined %}
+    nova_fs:
+      alerting: enabled
+      triggers:
+      - nova_fs_critical
+      - nova_fs_warning
+      dimension:
+        service: nova-fs
+    nova_logs_compute:
+      alerting: enabled
+      triggers:
+      - nova_logs_error
+      dimension:
+        service: nova-logs-compute
+    {%- endif %}
+    {%- if pillar.nova.controller is defined %}
+    nova_logs:
+      alerting: enabled
+      triggers:
+      - nova_logs_error
+      dimension:
+        service: nova-logs
+    nova_api_endpoint:
+      alerting: enabled
+      triggers:
+      - nova_api_local_endpoint
+      dimension:
+        service: nova-api-endpoint
+    {%- endif %}
+remote_collector:
+  trigger:
+    {%- if pillar.nova.controller is defined %}
+    nova_api_check_failed:
+      description: 'Endpoint check for nova-api is failed'
+      severity: down
+      rules:
+      - metric: openstack_check_api
+        field:
+          service: nova-api
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: last
+    {%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
+    nova_{{ nova_service }}_one_down:
+      description: 'At least one Nova {{ nova_service }} is down'
+      severity: warning
+      rules:
+      - metric: openstack_nova_services
+        field:
+          service: {{ nova_service }}
+          state: down
+        relational_operator: '>'
+        threshold: 0
+        window: 60
+        periods: 0
+        function: last
+    nova_{{ nova_service }}_majority_down:
+      description: 'Majority of Nova {{ nova_service }}s are down'
+      severity: critical
+      rules:
+      - metric: openstack_nova_services
+        field:
+          service: {{ nova_service }}
+          state: up
+        relational_operator: '<='
+        threshold: 50
+        window: 60
+        periods: 0
+        function: last
+    nova_{{ nova_service }}_all_down:
+      description: 'All Nova {{ nova_service }}s are down'
+      severity: down
+      rules:
+      - metric: openstack_nova_services
+        field:
+          service: {{ nova_service }}
+          state: up
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: last
+    {%- endfor %}
+    nova_total_free_vcpu_warning:
+      description: 'There is no VCPU available for new instances'
+      severity: warning
+      rules:
+      - metric: openstack_nova_total_free_vcpus
+        relational_operator: '=='
+        threshold: 10
+        window: 60
+        periods: 0
+        function: max
+    nova_total_free_memory_warning:
+      description: 'There is no memory available for new instances'
+      severity: warning
+      rules:
+      - metric: openstack_nova_total_free_ram
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: max
+    {%- endif %}
+  alarm:
+    {%- if pillar.nova.controller is defined %}
+    nova_api_check:
+      alerting: true
+      triggers:
+      - nova_api_check_failed
+      dimension:
+        service: nova-api-check
+    {%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
+    nova_{{ nova_service }}:
+      alerting: true
+      triggers:
+      - nova_{{ nova_service }}_all_down
+      - nova_{{ nova_service }}_majority_down
+      - nova_{{ nova_service }}_one_down
+      dimension:
+        service: nova-{{ nova_service }}
+    {%- endfor %}
+    nova_free_vcpu:
+      alerting: enabled
+      triggers:
+      - nova_total_free_vcpu_warning
+      dimension:
+        service: nova-free-vcpu
+    nova_free_memory:
+      alerting: enabled
+      triggers:
+      - nova_total_free_memory_warning
+      dimension:
+        service: nova-free-memory
+    {%- endif %}
+aggregator:
+  alarm_cluster:
+    nova_fs:
+      policy: majority_of_members
+      group_by: hostname
+      match:
+        service: nova-fs
+      members:
+      - nova_fs
+      dimension:
+        service: nova-compute
+    nova_logs_compute:
+      policy: highest_severity
+      group_by: hostname
+      match:
+        service: nova-logs-compute
+      members:
+      - nova_logs_compute
+      dimension:
+        service: nova-compute
+    nova_logs:
+      policy: highest_severity
+      group_by: hostname
+      match:
+        service: nova-logs
+      members:
+      - nova_logs
+      dimension:
+        service: nova-control
+    nova_api_endpoint:
+      policy: availability_of_members
+      group_by: hostname
+      match:
+        service: nova-api-endpoint
+      members:
+      - nova_api_endpoint
+      dimension:
+        service: nova-control
+    nova_api_check:
+      policy: highest_severity
+      match:
+        service: nova-api-check
+      members:
+      - nova_api_check
+      dimension:
+        service: nova-control
+    {%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
+      policy: highest_severity
+      match:
+        service: nova-{{ nova_service }}
+      members:
+      - nova_{{ nova_service }}
+      dimension:
+        service: nova-control
+    {%- endfor %}
+    nova_free_vcpu:
+      policy: highest_severity
+      match:
+        service: nova-free-vcpu
+      members:
+      - nova_free_vcpu
+      dimension:
+        service: nova-compute
+    nova_free_memory:
+      policy: highest_severity
+      match:
+        service: nova-free-memory
+      members:
+      - nova_free_memory
+      dimension:
+        service: nova-compute
+    nova_control:
+      policy: highest_severity
+      match:
+        service: nova-control
+      members:
+      - nova_logs
+      - nova_api_endpoint
+      - nova_api_check
+      {%- for nova_service in ('cert', 'consoleauth', 'compute', 'conductor', 'scheduler') %}
+      - nova_{{ nova_service }}
+      {%- endfor %}
+      dimension:
+        cluster_name: nova-control
+    nova_compute:
+      policy: highest_severity
+      match:
+        service: nova-compute
+      members:
+      - nova_fs
+      - nova_logs_compute
+      - nova_free_vcpu
+      - nova_free_memory
+      dimension:
+        cluster_name: nova-compute