Add remote check of the apiserver through the VIP
Depends-On: Ibe7f825cc9de32c2f96c8104dadd98353af1b7d9
Change-Id: I92346f2e4c62a3c6c685ff5d0e7d84227cb40229
diff --git a/kubernetes/meta/collectd.yml b/kubernetes/meta/collectd.yml
index 148099e..8d1b5fa 100644
--- a/kubernetes/meta/collectd.yml
+++ b/kubernetes/meta/collectd.yml
@@ -92,3 +92,21 @@
match: confd .*/etc/calico/confd
{%- endif %}
{%- endif %}
+
+{%- if pool.get('enabled', False) %}
+remote_plugin:
+ collectd_http_check:
+ polling_interval: 30
+ url:
+ apiserver:
+ expected_code: 200
+ expected_content: ok
+ # Do not verify the certificate because urllib3 doesn't check for IP addresses in
+ # alternative names DNS entries.
+ # https://github.com/shazow/urllib3/issues/258
+ verify: false
+ client_cert: /etc/kubernetes/ssl/kubelet-client.crt
+ client_key: /etc/kubernetes/ssl/kubelet-client.key
+ url: https://{{ pool.apiserver.host }}:{{ pool.apiserver.port|default('443') }}/healthz
+ metric_name: k8s_service_health_vip
+{%- endif %}
diff --git a/kubernetes/meta/heka.yml b/kubernetes/meta/heka.yml
index 476156b..d3390f1 100644
--- a/kubernetes/meta/heka.yml
+++ b/kubernetes/meta/heka.yml
@@ -274,8 +274,42 @@
{%- endif %}
{%- if master.get('enabled', False) %}
+remote_collector:
+ trigger:
+ k8s-apiserver_vip:
+ description: 'K8s apiserver is down'
+ severity: down
+ rules:
+ - metric: k8s_service_health_vip
+ field:
+ service: apiserver
+ relational_operator: '=='
+ threshold: 0
+ window: 60
+ periods: 0
+ function: last
+ alarm:
+ k8s-apiserver-vip:
+ alerting: enabled
+ triggers:
+ - k8s-apiserver_vip
+ dimension:
+ service: k8s-apiserver-vip
+{%- endif %}
+
+{%- if master.get('enabled', False) %}
aggregator:
alarm_cluster:
+ k8s-apiserver_vip:
+ policy: highest_severity
+ alerting: enabled
+ match:
+ service: k8s-apiserver-vip
+ members:
+ - k8s-apiserver-vip
+ dimension:
+ service: k8s-master
+ nagios_host: 01-service-clusters
{%- for kube_service in kube_services %}
k8s-{{ kube_service }}_endpoint:
policy: availability_of_members
@@ -343,6 +377,7 @@
- k8s-apiserver_endpoint
- k8s-scheduler_endpoint
- k8s-controller-manager_endpoint
+ - k8s-apiserver_vip
dimension:
cluster_name: k8s-master
nagios_host: 00-top-clusters