Merge "Attempt to force restart on hyperkube version change"

commit: 461b89ae7a547d5625347a246677d7e7203be972 [log] [tgz]
author: Jedrzej Nowak <jnowak@mirantis.com> Thu Jun 14 10:49:26 2018 +0000
committer: Gerrit Code Review <gerrit2@99e8dbefe3b7> Thu Jun 14 10:49:26 2018 +0000
tree: 64eaf1e0097ed3b31e8f44d40e0c381c6793666b
parent: 46e0cd372ef5f4b3961be3e0f3b16654c81feb3a [diff]
parent: 72984cb1c8d18eb24a3fe2a5b4246964ca2fa240 [diff]
diff --git a/kubernetes/files/kube-addons/contrail/contrail.yaml b/kubernetes/files/kube-addons/contrail/contrail.yaml
index f023315..5d5ca58 100644
--- a/kubernetes/files/kube-addons/contrail/contrail.yaml
+++ b/kubernetes/files/kube-addons/contrail/contrail.yaml

@@ -1,3 +1,6 @@
+{%- from "kubernetes/map.jinja" import common with context -%}
+---
+
 apiVersion: apps/v1beta2
 kind: DaemonSet
 metadata:
@@ -19,7 +22,7 @@
       hostNetwork: true
       containers:
       - name: rabbitmq
-        image: rabbitmq:3.6.6-management-alpine
+        image: rabbitmq:{{ common.addons.get('contrail',{}).get('rabbitmq_version',"3.6.6") }}-management-alpine
         lifecycle:
           postStart:
             exec:
@@ -41,10 +44,10 @@
                 rabbitmqctl set_policy ha-all "." '{"ha-mode":"exactly","ha-params":3,"ha-sync-mode":"automatic"}'
         env:
         - name: RABBITMQ_ERLANG_COOKIE
-          value: YTQMGYEHFATZPDKPOCXX
+          value: {{ common.addons.get('contrail',{}).get('rabbitmq_erlang_cookie',"YTQMGYEHFATZPDKPOCXX") }}
 
       - name: opencontrail-controller
-        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-controller
+        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-controller:{{ common.addons.get('contrail',{}).get('contrail_version',"latest") }}
         securityContext:
           privileged: true
         lifecycle:
@@ -78,11 +81,10 @@
           mountPath: /etc/zookeeper/conf/zoo.cfg
         - name: etc-zookeeper-conf-log4j-properties
           mountPath: /etc/zookeeper/conf/log4j.properties
-        - name: var-lib-rabbitmq-erlang-cookie
-          mountPath: /var/lib/rabbitmq/.erlang.cookie
+
 
       - name: opencontrail-analyticsdb
-        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-analyticsdb
+        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-analyticsdb:{{ common.addons.get('contrail',{}).get('contrail_version',"latest") }}
         securityContext:
           privileged: true
         volumeMounts:
@@ -114,7 +116,7 @@
           mountPath: /etc/zookeeper/conf/log4j.properties
 
       - name: opencontrail-analytics
-        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-analytics
+        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-analytics:{{ common.addons.get('contrail',{}).get('contrail_version',"latest") }}
         volumeMounts:
         - name: etc-contrail
           mountPath: /etc/contrail
@@ -151,11 +153,6 @@
         hostPath:
           path: /etc/zookeeper/conf/zoo.cfg
           type: File
-      - name: var-lib-rabbitmq-erlang-cookie
-        hostPath:
-          path: /var/lib/rabbitmq/.erlang.cookie
-          type: File
-
 
       # analyticsdb
       - name: etc-cassandra-cassandra-env-analytics-sh

diff --git a/kubernetes/files/kube-addons/contrail/kube-manager.yaml b/kubernetes/files/kube-addons/contrail/kube-manager.yaml
index 7fd0e0e..3004649 100644
--- a/kubernetes/files/kube-addons/contrail/kube-manager.yaml
+++ b/kubernetes/files/kube-addons/contrail/kube-manager.yaml

@@ -1,3 +1,5 @@
+{%- from "kubernetes/map.jinja" import common with context -%}
+---
 apiVersion: apps/v1beta2
 kind: DaemonSet
 metadata:
@@ -19,7 +21,7 @@
       hostNetwork: true
       containers:
       - name: opencontrail-kube-manager
-        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-kube-manager
+        image: docker-prod-local.artifactory.mirantis.com/opencontrail-oc40/opencontrail-kube-manager:{{ common.addons.get('contrail',{}).get('contrail_version',"latest") }}
         securityContext:
           privileged: true
         lifecycle:

diff --git a/kubernetes/meta/prometheus.yml b/kubernetes/meta/prometheus.yml
index 3ca5453..e873d38 100644
--- a/kubernetes/meta/prometheus.yml
+++ b/kubernetes/meta/prometheus.yml

@@ -155,8 +155,8 @@
         severity: warning
         service: kubernetes
       annotations:
-        summary: "Failed to get the container metrics"
-        description: "Prometheus was not able to scrape metrics from the container on the {{ $labels.instance }} instance."
+        summary: "Failed to get Kubernetes container metrics"
+        description: "Prometheus was not able to scrape metrics from the container on the {{ $labels.instance }} Kubernetes instance."
     {% endraw %}
     KubernetesProcessDown:
       if: >-
@@ -168,7 +168,7 @@
         service: kubernetes
       annotations:
         summary: "Kubernetes {{ $labels.process_name }} process is down"
-        description: "Kubernetes {{ $labels.process_name }} process on the {{ $labels.host }} node is down for at least 2 minutes."
+        description: "Kubernetes {{ $labels.process_name }} process on the {{ $labels.host }} node is down for 2 minutes."
     {% endraw %}
     KubernetesProcessDownMinor:
       if: >-
@@ -179,9 +179,9 @@
         severity: minor
         service: kubernetes
       annotations:
-        summary: "{% endraw %}{{ instance_minor_threshold_percent * 100 }}%{% raw %} of Kubernetes {{ $labels.process_name }} process instances are down"
+        summary: "{% endraw %}{{ instance_minor_threshold_percent * 100 }}%{% raw %} of Kubernetes {{ $labels.process_name }} processes are down"
         description: >-
-          {{ $value }} of Kubernetes {{ $labels.process_name }} process instances are down {% endraw %}(at least {{ instance_minor_threshold_percent * 100 }}%) for at least 2 minutes.
+          {{ $value }} of Kubernetes {{ $labels.process_name }} processes (>= {% endraw %} {{ instance_minor_threshold_percent * 100 }}%) are down for 2 minutes.
     KubernetesProcessDownMajor:
       if: >-
         count(procstat_running{process_name=~"hyperkube-.*"} == 0) by (process_name) > count(procstat_running{process_name=~"hyperkube-.*"}) by (process_name) * {{ instance_major_threshold_percent }}
@@ -190,9 +190,9 @@
         severity: major
         service: kubernetes
       annotations:
-        summary: "{{ instance_major_threshold_percent * 100 }}%{% raw %} of Kubernetes {{ $labels.process_name }} process instances are down"
+        summary: "{{ instance_major_threshold_percent * 100 }}%{% raw %} of Kubernetes {{ $labels.process_name }} processes are down"
         description: >-
-          {{ $value }} of Kubernetes {{ $labels.process_name }} process instances are down {% endraw %}(at least {{ instance_major_threshold_percent * 100 }}%) for at least 2 minutes.
+          {{ $value }} of Kubernetes {{ $labels.process_name }} processes (>= {% endraw %} {{ instance_major_threshold_percent * 100 }}%) are down for 2 minutes.
     KubernetesProcessOutage:
       if: >-
         count(procstat_running{process_name=~"hyperkube-.*"}) by (process_name) == count(procstat_running{process_name=~"hyperkube-.*"} == 0) by (process_name)
@@ -203,7 +203,7 @@
         service: kubernetes
       annotations:
         summary: "Kubernetes {{ $labels.process_name }} cluster outage"
-        description: "All Kubernetes {{ $labels.process_name }} process instances are down for at least 2 minutes."
+        description: "All Kubernetes {{ $labels.process_name }} processes are down for 2 minutes."
     {% endraw %}
 {%- if network.get('calico', {}).get('enabled', False) %}
     CalicoProcessDown:
@@ -216,7 +216,7 @@
         service: calico
       annotations:
         summary: "Calico {{ $labels.process_name }} process is down"
-        description: "Calico {{ $labels.process_name }} process on the {{ $labels.host }} node is down for at least 2 minutes."
+        description: "Calico {{ $labels.process_name }} process on the {{ $labels.host }} node is down for 2 minutes."
     {% endraw %}
     CalicoProcessDownMinor:
       if: >-
@@ -226,9 +226,9 @@
         severity: minor
         service: calico
       annotations:
-        summary: "{{ instance_minor_threshold_percent * 100 }}%{% raw %} of Calico {{ $labels.process_name }} process instances are down"
+        summary: "{{ instance_minor_threshold_percent * 100 }}%{% raw %} of Calico {{ $labels.process_name }} processes are down"
         description: >-
-          {{ $value }} of Calico {{ $labels.process_name }} process instances are down {% endraw %}(at least {{ instance_minor_threshold_percent * 100 }}%) for at least 2 minutes.
+          {{ $value }} of Calico {{ $labels.process_name }} processes (>= {% endraw %} {{ instance_minor_threshold_percent * 100 }}%) are down for 2 minutes.
     CalicoProcessDownMajor:
       if: >-
         count(procstat_running{process_name=~"calico-felix|bird|bird6|confd"} == 0) by (process_name) > count(procstat_running{process_name=~"calico-felix|bird|bird6|confd"}) by (process_name) * {{ instance_major_threshold_percent }}
@@ -237,9 +237,9 @@
         severity: major
         service: calico
       annotations:
-        summary: "{{ instance_major_threshold_percent * 100 }}%{% raw %} of Calico {{ $labels.process_name }} process instances are down"
+        summary: "{{ instance_major_threshold_percent * 100 }}%{% raw %} of Calico {{ $labels.process_name }} processes are down"
         description: >-
-          {{ $value }} of Calico {{ $labels.process_name }} process instances are down {% endraw %}(at least {{ instance_major_threshold_percent * 100 }}%) for at least 2 minutes.
+          {{ $value }} of Calico {{ $labels.process_name }} processes (>= {% endraw %} {{ instance_major_threshold_percent * 100 }}%) are down for 2 minutes.
     CalicoProcessOutage:
       if: >-
         count(procstat_running{process_name=~"calico-felix|bird|bird6|confd"}) by (process_name) == count(procstat_running{process_name=~"calico-felix|bird|bird6|confd"} == 0) by (process_name)
@@ -250,6 +250,6 @@
         service: calico
       annotations:
         summary: "Calico {{ $labels.process_name }} cluster outage"
-        description: "All Calico {{ $labels.process_name }} process instances are down for at least 2 minutes."
+        description: "All Calico {{ $labels.process_name }} processes are down for 2 minutes."
     {% endraw %}
 {% endif %}
commit	461b89ae7a547d5625347a246677d7e7203be972	[log] [tgz]
author	Jedrzej Nowak <jnowak@mirantis.com>	Thu Jun 14 10:49:26 2018 +0000
committer	Gerrit Code Review <gerrit2@99e8dbefe3b7>	Thu Jun 14 10:49:26 2018 +0000
tree	64eaf1e0097ed3b31e8f44d40e0c381c6793666b
parent	46e0cd372ef5f4b3961be3e0f3b16654c81feb3a [diff]
parent	72984cb1c8d18eb24a3fe2a5b4246964ca2fa240 [diff]