Add alarms

Change-Id: I805f3298dd5f69f7b4535f949eb943a160665f6c
diff --git a/opencontrail/meta/heka.yml b/opencontrail/meta/heka.yml
index 069dd0f..1f09ef5 100644
--- a/opencontrail/meta/heka.yml
+++ b/opencontrail/meta/heka.yml
@@ -1,32 +1,32 @@
 {%- if pillar.opencontrail is defined %}
-{%- if pillar.opencontrail.control is defined %}
-{%- from "opencontrail/map.jinja" import control with context %}
-{%- if control.get('enabled', False) %}
-{%- set controller_ref = control %}
-{%- endif %}
-{%- elif pillar.opencontrail.compute is defined %}
-{%- from "opencontrail/map.jinja" import compute with context %}
-{%- if compute.get('enabled', False) %}
-{%- set compute_ref = compute %}
-{%- endif %}
-{%- endif %}
-{%- if pillar.opencontrail.web is defined %}
-{%- from "opencontrail/map.jinja" import web with context %}
-{%- if web.get('enabled', False) %}
-{%- set web_ref = web %}
-{%- endif %}
-{%- endif %}
+  {%- if pillar.opencontrail.control is defined %}
+    {%- from "opencontrail/map.jinja" import control with context %}
+    {%- if control.get('enabled', False) %}
+      {%- set controller_ref = control %}
+    {%- endif %}
+  {%- elif pillar.opencontrail.compute is defined %}
+    {%- from "opencontrail/map.jinja" import compute with context %}
+    {%- if compute.get('enabled', False) %}
+      {%- set compute_ref = compute %}
+    {%- endif %}
+  {%- endif %}
+  {%- if pillar.opencontrail.web is defined %}
+    {%- from "opencontrail/map.jinja" import web with context %}
+    {%- if web.get('enabled', False) %}
+      {%- set web_ref = web %}
+    {%- endif %}
+  {%- endif %}
 {%- endif %}
 
 {%- if controller_ref is defined or compute_ref is defined %}
 log_collector:
-{%- if controller_ref is defined %}
+  {%- if controller_ref is defined %}
   splitter:
     java:
       engine: regex
       delimiter: '\n([0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2},[0-9]{3} - )'
       delimiter_eol: false
-{%- endif %}
+  {%- endif %}
   decoder:
     contrail:
       engine: sandbox
@@ -38,7 +38,7 @@
       module_file: /usr/share/lma_collector/decoders/contrail_supervisor_log.lua
       module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
       adjust_timezone: true
-{%- if controller_ref is defined %}
+  {%- if controller_ref is defined %}
     contrail_collector:
       engine: sandbox
       module_file: /usr/share/lma_collector/decoders/contrail_collector_log.lua
@@ -64,14 +64,14 @@
       module_file: /usr/share/lma_collector/decoders/ifmap.lua
       module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
       adjust_timezone: true
-{%- endif %}
-{%- if web_ref is defined %}
+  {%- endif %}
+  {%- if web_ref is defined %}
     redis:
       engine: sandbox
       module_file: /usr/share/lma_collector/decoders/redis.lua
       module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
       adjust_timezone: true
-{%- endif %}
+  {%- endif %}
   input:
     contrail_supervisor_log:
       engine: logstreamer
@@ -89,7 +89,7 @@
       priority: ["^Seq"]
       decoder: "contrail_supervisor_decoder"
       splitter: "TokenSplitter"
-{%- if controller_ref is defined %}
+  {%- if controller_ref is defined %}
     contrail_collector_log:
       engine: logstreamer
       log_directory: "/var/log"
@@ -130,7 +130,7 @@
       priority: ["^Seq"]
       decoder: "ifmap_decoder"
       splitter: "java_splitter"
-{%- endif %}
+  {%- endif %}
     contrail_main_log:
       engine: logstreamer
       log_directory: "/var/log"
@@ -139,7 +139,7 @@
       priority: ["^Seq"]
       decoder: "contrail_decoder"
       splitter: "TokenSplitter"
-{%- if web_ref is defined and web_ref.get('cache', {}).get('engine', '') == 'redis' %}
+  {%- if web_ref is defined and web_ref.get('cache', {}).get('engine', '') == 'redis' %}
     redis_log:
       engine: logstreamer
       log_directory: "/var/log"
@@ -148,5 +148,454 @@
       priority: ["^Seq"]
       decoder: "redis_decoder"
       splitter: "TokenSplitter"
-{%- endif %}
+  {%- endif %}
+metric_collector:
+  trigger:
+  {%- if controller_ref is defined %}
+    {%- for contrail_process in ('contrail-control', 'contrail-control-nodemgr',
+    'contrail-dns', 'contrail-named', 'contrail-alarm-gen', 'contrail-analytics-api',
+    'contrail-analytics-nodemgr', 'contrail-collector', 'contrail-query-engine',
+    'contrail-snmp-collector', 'contrail-topology', 'contrail-api', 'contrail-config-nodemgr',
+    'contrail-device-manager', 'contrail-discovery', 'contrail-schema', 'contrail-svc-monitor',
+    'contrail-database-nodemgr', 'ifmap', 'kafka', 'cassandra', 'zookeeper'
+    ) %}
+    {{ contrail_process|replace("-", "_") }}:
+      description: "There is no {{ contrail_process }} process running"
+      severity: critical
+      rules:
+      - metric: lma_components_processes
+        fields:
+          service: '{{ contrail_process }}'
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: min
+    {%- endfor %}
+    xmpp_number_of_sessions_lo:
+      description: "There are no xmpp-number-of-sessions"
+      severity: warning
+      rules:
+      - metric: xmpp-number-of-sessions
+        relational_operator: '=='
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+    xmpp_number_of_sessions_hi:
+      description: "There are too many xmpp-number-of-sessions"
+      severity: warning
+      rules:
+      - metric: xmpp-number-of-sessions
+        relational_operator: '=='
+        threshold: 500
+        window: 100
+        periods: 0
+        function: min
+    xmpp_number_of_sessions_diff:
+      description: "Number of xmpp-number-of-sessions changed between checks is too high"
+      severity: warning
+      rules:
+      - metric: xmpp-number-of-sessions
+        relational_operator: '>='
+        threshold: 100
+        window: 100
+        periods: 0
+        function: diff
+    vrouter_xmpp_of_sessions_lo:
+      description: "There are no vrouter-xmpp sessions"
+      severity: warning
+      rules:
+      - metric: vrouter-xmpp
+        relational_operator: '=='
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+    vrouter_xmpp_of_sessions_hi:
+      description: "There are too many vrouter-xmpp sessions"
+      severity: warning
+      rules:
+      - metric: vrouter-xmpps
+        relational_operator: '=='
+        threshold: 10
+        window: 100
+        periods: 0
+        function: min
+    vrouter_xmpp_of_sessions_diff:
+      description: "Number of vrouter-xmpp changed between checks is too high"
+      severity: warning
+      rules:
+      - metric: vrouter-xmpp
+        relational_operator: '>='
+        threshold: 5
+        window: 100
+        periods: 0
+        function: diff
+    vrouter_xmpp_dns_of_sessions_lo:
+      description: "There are no vrouter-dns-xmpp sessions"
+      severity: warning
+      rules:
+      - metric: vrouter-dns-xmpp
+        relational_operator: '=='
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+    vrouter_xmpp_dns_of_sessions_hi:
+      description: "There are too many vrouter-dns-xmpp sessions"
+      severity: warning
+      rules:
+      - metric: vrouter-dns-xmpps
+        relational_operator: '=='
+        threshold: 10
+        window: 100
+        periods: 0
+        function: min
+    vrouter_xmpp_dns_of_sessions_diff:
+      description: "Number of vrouter-dns-xmpp changed between checks is too high"
+      severity: warning
+      rules:
+      - metric: vrouter-dns-xmpp
+        relational_operator: '>='
+        threshold: 5
+        window: 100
+        periods: 0
+        function: diff
+    vrouter_lls_sessions_lo:
+      description: "There are no vrouter-vrouter-lls sessions"
+      severity: warning
+      rules:
+      - metric: vrouter-lls
+        relational_operator: '=='
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+    vrouter_lls_sessions_hi:
+      description: "There are too many vrouter-vrouter-lls sessions"
+      severity: warning
+      rules:
+      - metric: vrouter-lls
+        relational_operator: '=='
+        threshold: 10
+        window: 100
+        periods: 0
+        function: min
+    vrouter_lls_of_sessions_diff:
+      description: "Number of vrouter-vrouter-lls changed between checks is too high"
+      severity: warning
+      rules:
+      - metric: vrouter-lls
+        relational_operator: '>='
+        threshold: 5
+        window: 100
+        periods: 0
+        function: diff
+    xmpp_number_of_sessions_up:
+      description: "There are no active XMPP sessions "
+      severity: warning
+      rules:
+      - metric: xmpp-number-of-sessions-up
+        relational_operator: '=='
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+    xmpp_number_of_sessions_down:
+      description: "There are inactive XMPP sessions"
+      severity: warning
+      rules:
+      - metric: xmpp-number-of-sessions-down
+        relational_operator: '>='
+        threshold: 1
+        window: 100
+        periods: 0
+        function: min
+    bgp_session_number:
+      description: "There are no BGP sessions"
+      severity: warning
+      rules:
+      - metric: bgp-session-number
+        relational_operator: '=='
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+    bgp_session_number_up:
+      description: "There are no active BGP sessions "
+      severity: warning
+      rules:
+      - metric: bgp-session-number-up
+        relational_operator: '=='
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+    bgp_session_number_down:
+      description: "There are inactive BGP sessions"
+      severity: warning
+      rules:
+      - metric: bgp-session-number-down
+        relational_operator: '>='
+        threshold: 1
+        window: 100
+        periods: 0
+        function: min
+    vrouter_openedsockets:
+      description: "There are too many sockets opened for vRouter"
+      severity: warning
+      rules:
+      - metric: vrouter-openedsockets
+        relational_operator: '>='
+        threshold: 0
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_active:
+      description: "There are too many vrouter flows"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-active
+        relational_operator: '>='
+        threshold: 1200
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_zero_active:
+      description: "There are no active flows"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-active
+        relational_operator: '=='
+        threshold: 0
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_created:
+      description: "There are too many vrouter flows created"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-created
+        relational_operator: '>='
+        threshold: 1000
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_discard:
+      description: "There are too many vrouter flows: discards"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-discard
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_drop:
+      description: "There are too many vrouter flows: drops"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-drop
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_frag_err:
+      description: "There are too many vrouter flows: fragment errors"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-frag-err
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_invalid_nh:
+      description: "There are too many vrouter flows: invalid_nh"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-invalid-nh
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_composite_invalid_interface:
+      description: "There are too many vrouter flows: composite_invalid_interface"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-composite-invalid-interface
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_invalid_label:
+      description: "There are too many vrouter flows: invalid_label"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-invalid-label
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flows_flow_queue_limit_exceeded:
+      description: "There are too many vrouter flows: flow_queue_limit_exceeded"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-flow-queue-limit-exceeded
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    vrouter_flow_full:
+      description: "There are too many vrouter flows: flow_table_full"
+      severity: warning
+      rules:
+      - metric: vrouter-flows-flow-table-full
+        relational_operator: '>='
+        threshold: 100
+        window: 120
+        periods: 0
+        function: min
+    cassandra_cluster_endpoint_down:
+      description: "Cassandra Cluster Endpoint is down"
+      severity: critical
+      rules:
+      - metric: DownEndpointCount
+        relational_operator: '>'
+        threshold: 0
+        window: 100
+        periods: 0
+        function: min
+  {%- endif %}
+  {%- if web_ref is defined %}
+    {%- for contrail_process in ('contrail-webui', 'contrail-webui-middleware'
+    ) %}
+    {{ contrail_process|replace("-", "_") }}:
+      description: "There is no {{ contrail_process }} process running"
+      severity: critical
+      rules:
+      - metric: lma_components_processes
+        fields:
+          service: '{{ contrail_process }}'
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: min
+    {%- endfor %}
+  {%- endif %}
+  {%- if compute_ref is defined %}
+    {%- for contrail_process in ('contrail-vrouter-agent', 'contrail-vrouter-nodemgr'
+    ) %}
+    {{ contrail_process|replace("-", "_") }}:
+      description: "There is no {{ contrail_process }} process running"
+      severity: critical
+      rules:
+      - metric: lma_components_processes
+        fields:
+          service: '{{ contrail_process|replace("-", "_") }}'
+        relational_operator: '=='
+        threshold: 0
+        window: 60
+        periods: 0
+        function: min
+    {%- endfor %}
+  {%- endif %}
+  alarm:
+  {%- if controller_ref is defined %}
+    {%- for contrail_process in ('contrail-control', 'contrail-control-nodemgr',
+    'contrail-dns', 'contrail-named', 'contrail-alarm-gen', 'contrail-analytics-api',
+    'contrail-analytics-nodemgr', 'contrail-collector', 'contrail-query-engine',
+    'contrail-snmp-collector', 'contrail-topology', 'contrail-api', 'contrail-config-nodemgr',
+    'contrail-device-manager', 'contrail-discovery', 'contrail-schema', 'contrail-svc-monitor',
+    'contrail-database-nodemgr', 'ifmap', 'kafka', 'cassandra', 'zookeeper'
+    ) %}
+    {{ contrail_process|replace("-", "_") }}:
+      alerting: enabled
+      triggers:
+      - {{ contrail_process|replace("-", "_") }}
+      dimension:
+        service: contrail-processes-control
+    {%- endfor %}
+  {%- endif %}
+  {%- if web_ref is defined %}
+    {%- for contrail_process in ('contrail-webui', 'contrail-webui-middleware'
+    ) %}
+    {{ contrail_process|replace("-", "_") }}:
+      alerting: enabled
+      triggers:
+      - {{ contrail_process|replace("-", "_") }}
+      dimension:
+        service: contrail-processes-control
+    {%- endfor %}
+  {%- endif %}
+  {%- if compute_ref is defined %}
+    {%- for contrail_process in ('contrail-vrouter-agent', 'contrail-vrouter-nodemgr'
+    ) %}
+    {{ contrail_process|replace("-", "_") }}:
+      alerting: enabled
+      triggers:
+      - {{ contrail_process|replace("-", "_") }}
+      dimension:
+        service: contrail-processes-compute
+    {%- endfor %}
+  {%- endif %}
+aggregator:
+  alarm_cluster:
+  {%- if controller_ref is defined %}
+    contrail_processes_control:
+      policy: highest_severity
+      group_by: hostname
+      alerting: enabled
+      match:
+        service: contrail-processes-control
+      members:
+    {%- for contrail_process in ('contrail-control', 'contrail-control-nodemgr',
+    'contrail-dns', 'contrail-named', 'contrail-alarm-gen', 'contrail-analytics-api',
+    'contrail-analytics-nodemgr', 'contrail-collector', 'contrail-query-engine',
+    'contrail-snmp-collector', 'contrail-topology', 'contrail-api', 'contrail-config-nodemgr',
+    'contrail-device-manager', 'contrail-discovery', 'contrail-schema', 'contrail-svc-monitor',
+    'contrail-database-nodemgr', 'ifmap', 'kafka', 'cassandra', 'zookeeper'
+    ) %}
+      - {{ contrail_process|replace("-", "_") }}:
+    {%- endfor %}
+      dimension:
+        service: contrail-control
+  {%- endif %}
+  {%- if web_ref is defined %}
+    contrail_processes_compute:
+      policy: highest_severity
+      group_by: hostname
+      alerting: enabled
+      match:
+        service: contrail-processes-control
+      members:
+    {%- for contrail_process in ('contrail-webui', 'contrail-webui-middleware'
+    ) %}
+      - {{ contrail_process|replace("-", "_") }}:
+    {%- endfor %}
+      dimension:
+        service: contrail-control
+  {%- endif %}
+  {%- if compute_ref is defined %}
+    contrail_processes_compute:
+      policy: highest_severity
+      group_by: hostname
+      alerting: enabled
+      match:
+        service: contrail-processes-compute
+      members:
+    {%- for contrail_process in ('contrail-vrouter-agent', 'contrail-vrouter-nodemgr'
+    ) %}
+      - {{ contrail_process|replace("-", "_") }}:
+    {%- endfor %}
+      dimension:
+        service: contrail-compute
+  {%- endif %}
 {%- endif %}