Merge "Fix alerts"
diff --git a/opencontrail/meta/prometheus.yml b/opencontrail/meta/prometheus.yml
index 6f86598..ed997a5 100644
--- a/opencontrail/meta/prometheus.yml
+++ b/opencontrail/meta/prometheus.yml
@@ -59,9 +59,9 @@
     enabled: true
 {%- if packages is defined %}
     packages:
-    {% for pkg in packages %}
+    {%- for pkg in packages %}
     - {{ pkg }}
-    {% endfor %}
+    {%- endfor %}
 {%- endif %}
 {%- if template is defined %}
     template:  {{ template }}
@@ -90,7 +90,7 @@
     {%- if control_processes is defined %}
       {%- for contrail_api in control_apis %}
         {%- set words = contrail_api.split('.') %}
-    {% for word in words %}{%- if word != 'api' %}{{ word | capitalize }}{% endif %}{% endfor %}APIDown:
+    {% for word in words %}{% if word != 'api' %}{{ word | capitalize }}{% endif %}{% endfor %}APIDown:
       if: >-
         http_response_status{service=~"{{ contrail_api }}"} == 0
 {%- raw %}
@@ -113,13 +113,15 @@
         severity: down
         service: {{ contrail_process }}
       annotations:
-        summary: '{{ contrail_process }} service is down'
-        description: '{{ contrail_process }} service is down on node {% raw %}{{ $labels.host }}{% endraw %}'
+{%- raw %}
+        summary: '{{ $labels.service }} service is down'
+        description: '{{ $labels.service }} service is down on node {{ $labels.host }}'
+{%- endraw %}
       {%- endfor %}
 {%- raw %}
     ContrailBGPSessionsNoneUp:
       if: >-
-        max(contrail_bgp_session_up_count) == 0
+        max(contrail_bgp_session_up_count) by (host) == 0
       for: 2m
       labels:
         severity: warning
@@ -129,7 +131,7 @@
         description: 'There are no active BGP sessions on node {{ $labels.host }}'
     ContrailBGPSessionsSomeDown:
       if: >-
-        min(contrail_bgp_session_down_count) > 0
+        min(contrail_bgp_session_down_count) by (host) > 0
       for: 2m
       labels:
         severity: warning
@@ -139,7 +141,7 @@
         description: 'There are inactive BGP sessions on node {{ $labels.host }}'
     ContrailBGPSessionsNone:
       if: >-
-        max(contrail_bgp_session_count) == 0
+        max(contrail_bgp_session_count) by (host) == 0
       for: 2m
       labels:
         severity: warning
@@ -149,7 +151,7 @@
         description: 'There are no BGP sessions on node {{ $labels.host }}'
     ContrailXMPPSessionsNoneUp:
       if: >-
-        max(contrail_xmpp_session_up_count) == 0
+        max(contrail_xmpp_session_up_count) by (host) == 0
       for: 2m
       labels:
         severity: warning
@@ -159,7 +161,7 @@
         description: 'There are no active XMPP sessions on node {{ $labels.host }}'
     ContrailXMPPSessionsSomeDown:
       if: >-
-        min(contrail_xmpp_session_down_count) > 0
+        min(contrail_xmpp_session_down_count) by (host) > 0
       for: 2m
       labels:
         severity: warning
@@ -169,7 +171,7 @@
         description: 'There are inactive XMPP sessions on node {{ $labels.host }}'
     ContrailXMPPSessionsNone:
       if: >-
-        max(contrail_xmpp_session_count) == 0
+        max(contrail_xmpp_session_count) by (host) == 0
       for: 2m
       labels:
         severity: warning
@@ -181,7 +183,7 @@
       if: >-
 {%- endraw %}
     {%- set xmpp_toohigh_threshold = monitoring.xmpp_sessions_too_high_threshold %}
-        min(contrail_xmpp_session_count) >= {{ xmpp_toohigh_threshold }}
+        min(contrail_xmpp_session_count) by (host) >= {{ xmpp_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -189,7 +191,7 @@
         service: contrail-control
       annotations:
         summary: 'Too many XMPP sessions'
-        description: 'There are too many XMPP sessions on node {{ $labels.host }} (current value={{ $value }}, threshold={%- endraw %}{{ xmpp_toohigh_threshold }})'
+        description: 'There are too many XMPP sessions on node {{ $labels.host }} (current value={{ $value }}, threshold={% endraw %}{{ xmpp_toohigh_threshold }})'
     ContrailXMPPSessionsTooManyVariations:
       if: >-
     {%- set xmpp_variation_threshold = monitoring.xmpp_sessions_variation_threshold %}
@@ -200,13 +202,13 @@
         service: contrail-control
       annotations:
         summary: 'Number of XMPP sessions changed between checks is too high'
-        description: 'There are too many XMPP sessions changes on node {{ $labels.host }} (current value={{ $value }}, threshold={%- endraw %}{{ xmpp_variation_threshold }})'
+        description: 'There are too many XMPP sessions changes on node {{ $labels.host }} (current value={{ $value }}, threshold={% endraw %}{{ xmpp_variation_threshold }})'
     {%- endif %}
 
     {%- if collector_processes is defined %}
       {%- for contrail_api in collector_apis %}
         {%- set words = contrail_api.split('.') %}
-    {% for word in words %}{%- if word != 'api' %}{{ word | capitalize }}{% endif %}{% endfor %}APIDown:
+    {% for word in words %}{% if word != 'api' %}{{ word | capitalize }}{% endif %}{% endfor %}APIDown:
       if: >-
         http_response_status{service=~"{{ contrail_api }}"} == 0
 {%- raw %}
@@ -229,8 +231,10 @@
         severity: down
         service: {{ contrail_process }}
       annotations:
-        summary: '{{ contrail_process }} service is down'
-        description: '{{ contrail_process }} service is down on node {% raw %}{{ $labels.host }}{% endraw %}'
+{%- raw %}
+        summary: '{{ $labels.service }} service is down'
+        description: '{{ $labels.service }} service is down on node {{ $labels.host }}'
+{%- endraw %}
       {%- endfor %}
     {%- endif %}
 
@@ -238,7 +242,7 @@
 {%- raw %}
     ContrailVrouterXMPPSessionsNone:
       if: >-
-        max(contrail_vrouter_xmpp) == 0
+        max(contrail_vrouter_xmpp) by (host) == 0
       for: 2m
       labels:
         severity: warning
@@ -250,7 +254,7 @@
       if: >-
 {%- endraw %}
     {%- set vrouter_xmpp_toohigh_threshold = monitoring.vrouter_xmpp_sessions_too_high_threshold %}
-        min(contrail_vrouter_xmpp) >= {{ vrouter_xmpp_toohigh_threshold }}
+        min(contrail_vrouter_xmpp) by (host) >= {{ vrouter_xmpp_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -273,7 +277,7 @@
 {%- raw %}
     ContrailVrouterDNSXMPPSessionsNone:
       if: >-
-        max(contrail_vrouter_dns_xmpp) == 0
+        max(contrail_vrouter_dns_xmpp) by (host) == 0
       for: 2m
       labels:
         severity: warning
@@ -285,7 +289,7 @@
       if: >-
 {%- endraw %}
     {%- set vrouter_dns_xmpp_toohigh_threshold = monitoring.vrouter_dns_xmpp_sessions_too_high_threshold %}
-        min(contrail_vrouter_dns_xmpp) >= {{ vrouter_dns_xmpp_toohigh_threshold }}
+        min(contrail_vrouter_dns_xmpp) by (host) >= {{ vrouter_dns_xmpp_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -308,7 +312,7 @@
     ContrailVrouterLLSSessionsTooMany:
       if: >-
     {%- set vrouter_lls_toohigh_threshold = monitoring.vrouter_lls_too_high_threshold %}
-        min(contrail_vrouter_lls) >= {{ vrouter_lls_toohigh_threshold }}
+        min(contrail_vrouter_lls) by (host) >= {{ vrouter_lls_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -331,7 +335,7 @@
     ContrailFlowsActiveTooMany:
       if: >-
     {%- set vrouter_flows_active_toohigh_threshold = monitoring.vrouter_flows_active_too_high_threshold %}
-        min(contrail_vrouter_flows_active) >= {{ vrouter_flows_active_toohigh_threshold }}
+        min(contrail_vrouter_flows_active) by (host) >= {{ vrouter_flows_active_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -343,7 +347,7 @@
     ContrailFlowsDiscardTooMany:
       if: >-
     {%- set vrouter_flows_discard_toohigh_threshold = monitoring.vrouter_flows_discard_too_high_threshold %}
-        min(contrail_vrouter_flows_discard) >= {{ vrouter_flows_discard_toohigh_threshold }}
+        min(contrail_vrouter_flows_discard) by (host) >= {{ vrouter_flows_discard_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -355,7 +359,7 @@
     ContrailFlowsDropTooMany:
       if: >-
     {%- set vrouter_flows_flow_action_drop_toohigh_threshold = monitoring.vrouter_flows_flow_action_drop_too_high_threshold %}
-        min(contrail_vrouter_flows_flow_action_drop) >= {{ vrouter_flows_flow_action_drop_toohigh_threshold }}
+        min(contrail_vrouter_flows_flow_action_drop) by (host) >= {{ vrouter_flows_flow_action_drop_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -367,7 +371,7 @@
     ContrailFlowsFragErrTooMany:
       if: >-
     {%- set vrouter_flows_frag_err_toohigh_threshold = monitoring.vrouter_flows_frag_err_too_high_threshold %}
-        min(contrail_vrouter_flows_frag_err) >= {{ vrouter_flows_frag_err_toohigh_threshold }}
+        min(contrail_vrouter_flows_frag_err) by (host) >= {{ vrouter_flows_frag_err_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -379,7 +383,7 @@
     ContrailFlowsInvalidNHTooMany:
       if: >-
     {%- set vrouter_flows_invalid_nh_toohigh_threshold = monitoring.vrouter_flows_invalid_nh_too_high_threshold %}
-        min(contrail_vrouter_flows_invalid_nh) >= {{ vrouter_flows_invalid_nh_toohigh_threshold }}
+        min(contrail_vrouter_flows_invalid_nh) by (host) >= {{ vrouter_flows_invalid_nh_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -391,7 +395,7 @@
     ContrailFlowsInvalidITFTooMany:
       if: >-
     {%- set vrouter_flows_composite_invalid_interface_toohigh_threshold = monitoring.vrouter_flows_composite_invalid_interface_too_high_threshold %}
-        min(contrail_vrouter_flows_composite_invalid_interface) >= {{ vrouter_flows_composite_invalid_interface_toohigh_threshold }}
+        min(contrail_vrouter_flows_composite_invalid_interface) by (host) >= {{ vrouter_flows_composite_invalid_interface_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -403,7 +407,7 @@
     ContrailFlowsInvalidLabelTooMany:
       if: >-
     {%- set vrouter_flows_invalid_label_toohigh_threshold = monitoring.vrouter_flows_invalid_label_too_high_threshold %}
-        min(contrail_vrouter_flows_invalid_label) >= {{ vrouter_flows_invalid_label_toohigh_threshold }}
+        min(contrail_vrouter_flows_invalid_label) by (host) >= {{ vrouter_flows_invalid_label_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -415,7 +419,7 @@
     ContrailFlowsQueueLimitExceededTooMany:
       if: >-
     {%- set vrouter_flows_flow_queue_limit_exceeded_toohigh_threshold = monitoring.vrouter_flows_flow_queue_limit_exceeded_too_high_threshold %}
-        min(contrail_vrouter_flows_flow_queue_limit_exceeded) >= {{ vrouter_flows_flow_queue_limit_exceeded_toohigh_threshold }}
+        min(contrail_vrouter_flows_flow_queue_limit_exceeded) by (host) >= {{ vrouter_flows_flow_queue_limit_exceeded_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -427,7 +431,7 @@
     ContrailFlowsTableFullTooMany:
       if: >-
     {%- set vrouter_flows_flow_table_full_toohigh_threshold = monitoring.vrouter_flows_flow_table_full_too_high_threshold %}
-        min(contrail_vrouter_flows_flow_table_full) >= {{ vrouter_flows_flow_table_full_toohigh_threshold }}
+        min(contrail_vrouter_flows_flow_table_full) by (host) >= {{ vrouter_flows_flow_table_full_toohigh_threshold }}
 {%- raw %}
       for: 2m
       labels:
@@ -438,7 +442,7 @@
         description: 'There are too many vRouter flows with table full on node {{ $labels.host }} (current value={{ $value }}, threshold={%- endraw %}{{ vrouter_flows_flow_table_full_toohigh_threshold }})'
       {%- for contrail_api in compute_apis %}
         {%- set words = contrail_api.split('.') %}
-    {% for word in words %}{%- if word != 'api' %}{{ word | capitalize }}{% endif %}{% endfor %}APIDown:
+    {% for word in words %}{% if word != 'api' %}{{ word | capitalize }}{% endif %}{% endfor %}APIDown:
       if: >-
         http_response_status{service=~"{{ contrail_api }}"} == 0
 {%- raw %}
@@ -461,8 +465,10 @@
         severity: down
         service: {{ contrail_process }}
       annotations:
-        summary: '{{ contrail_process }} service is down'
-        description: '{{ contrail_process }} service is down on node {% raw %}{{ $labels.host }}{% endraw %}'
+{%- raw %}
+        summary: '{{ $labels.service }} service is down'
+        description: '{{ $labels.service }} service is down on node {{ $labels.host }}'
+{%- endraw %}
       {%- endfor %}
     {%- endif %}
 
@@ -476,8 +482,10 @@
         severity: down
         service: {{ contrail_process }}
       annotations:
-        summary: '{{ contrail_process }} service is down'
-        description: '{{ contrail_process }} service is down on node {% raw %}{{ $labels.host }}{% endraw %}'
+{%- raw %}
+        summary: '{{ $labels.service }} service is down'
+        description: '{{ $labels.service }} service is down on node {{ $labels.host }}'
+{%- endraw %}
       {%- endfor %}
     {%- endif %}
 
@@ -491,8 +499,10 @@
         severity: down
         service: {{ contrail_process }}
       annotations:
-        summary: '{{ contrail_process }} service is down'
-        description: '{{ contrail_process }} service is down on node {% raw %}{{ $labels.host }}{% endraw %}'
+{%- raw %}
+        summary: '{{ $labels.service }} service is down'
+        description: '{{ $labels.service }} service is down on node { $labels.host }}'
+{%- endraw %}
       {%- endfor %}
     {%- endif %}