Monitor service replicas

This change configures new alerts that trigger when the number of
instances for a given Docker Swarm service doesn't match with the
configured replica number.

Change-Id: I0051ec3601d15ed75c41b7185546f47bac8c995e
diff --git a/docker/meta/prometheus.yml b/docker/meta/prometheus.yml
index 9ff54a4..a84ad0e 100644
--- a/docker/meta/prometheus.yml
+++ b/docker/meta/prometheus.yml
@@ -1,13 +1,48 @@
+{% from "docker/map.jinja" import host, client with context %}
+
 server:
   alert:
+{%- if host.get('enabled') %}
     ProcstatRunningDockerd:
       if: >-
         procstat_running{process_name="dockerd"} == 0
-      {% raw %}
+  {%- raw %}
       labels:
         severity: warning
         service: docker
       annotations:
         summary: 'Dockerd service is down'
         description: 'Dockerd service is down on node {{ $labels.host }}'
-      {% endraw %}
+  {%- endraw %}
+{%- endif %}
+{%- if client.get('enabled') %}
+  {%- for stack_name, stack in client.get('stack', {})|dictsort %}
+    {%- for service_name, service in stack.get('service', {})|dictsort %}
+      {%- if service.get('deploy', {}).replicas is defined %}
+        {%- set full_service_name = "{}_{}".format(stack_name, service_name) %}
+        {%- set camel_case_name = full_service_name.split('_')|map('capitalize')|join('') %}
+        {%- set label_selector = 'com_docker_swarm_service_name="{}_{}"'.format(stack_name, service_name) %}
+        {%- if service.deploy.replicas > 1 %}
+    DockerService{{ camel_case_name }}InvalidReplicasNumber:
+      if: >-
+        count(count_over_time(docker_container_cpu_usage_percent{{ '{' + label_selector + '}' }}[1m])) != {{ service.deploy.replicas }}
+      labels:
+        severity: warning
+        service: docker
+      annotations:
+        summary: 'Docker Swarm service {{ full_service_name }} invalid number of replicas'
+        description: "{%raw %}{{ $value }}{%- endraw %}/{{ service.deploy.replicas }} replicas are running for the Docker Swarn service '{{ full_service_name }}'."
+        {%- endif %}
+    DockerService{{ camel_case_name }}NoReplica:
+      if: >-
+        count(count_over_time(docker_container_cpu_usage_percent{{ '{' + label_selector + '}' }}[1m])) == 0 or absent(docker_container_cpu_usage_percent{{ '{' + label_selector + '}' }}) == 1
+      labels:
+        severity: critical
+        service: docker
+      annotations:
+        summary: 'Docker Swarm service {{ full_service_name }} down'
+        description: "No replicas are running for the Docker Swarn service '{{ full_service_name }}'."
+      {%- endif %}
+    {%- endfor %}
+  {%- endfor %}
+{%- endif %}