Perform the kubernetes nodes monitoring from the remote_collector
Depends-On: I530f1c3f3c93a25402a1e0a62b035ed209414a1f
Change-Id: I0045fb017f6883bb9ae743de5c6c74c08e559a42
diff --git a/kubernetes/files/collectd_k8s_get.conf b/kubernetes/files/collectd_k8s_get.conf
new file mode 100644
index 0000000..d7754b4
--- /dev/null
+++ b/kubernetes/files/collectd_k8s_get.conf
@@ -0,0 +1,11 @@
+Import "collectd_k8s_get"
+
+<Module "collectd_k8s_get">
+ Polling "{{ plugin.interval }}"
+ PollingInterval "{{ plugin.polling_interval }}"
+ Endpoint "{{ plugin.endpoint }}"
+ ClientCert "{{ plugin.client_cert}}"
+ ClientKey "{{ plugin.client_key }}"
+ Verify "{{ plugin.verify }}"
+</Module>
+
diff --git a/kubernetes/files/collectd_kubectl_get.conf b/kubernetes/files/collectd_kubectl_get.conf
deleted file mode 100644
index 1ede3ab..0000000
--- a/kubernetes/files/collectd_kubectl_get.conf
+++ /dev/null
@@ -1,7 +0,0 @@
-Import "collectd_k8s_kubectl_get"
-
-<Module "collectd_k8s_kubectl_get">
- Polling "{{ plugin.interval }}"
- PollingInterval "{{ plugin.polling_interval }}"
- GetNodes "{{ plugin.get_nodes }}"
-</Module>
diff --git a/kubernetes/meta/collectd.yml b/kubernetes/meta/collectd.yml
index 8d1b5fa..c1f7f04 100644
--- a/kubernetes/meta/collectd.yml
+++ b/kubernetes/meta/collectd.yml
@@ -54,14 +54,6 @@
url: http://127.0.0.1:10249/healthz
metric_name: k8s_service_health
{%- endif %}
-{%- if master.get('enabled', False) %}
- collectd_kubectl_get:
- plugin: python
- template: kubernetes/files/collectd_kubectl_get.conf
- polling_interval: 60
- interval: 30
- get_nodes: true
-{%- endif %}
collectd_processes:
process:
@@ -109,4 +101,13 @@
client_key: /etc/kubernetes/ssl/kubelet-client.key
url: https://{{ pool.apiserver.host }}:{{ pool.apiserver.port|default('443') }}/healthz
metric_name: k8s_service_health_vip
+ collectd_k8s_get:
+ plugin: python
+ template: kubernetes/files/collectd_k8s_get.conf
+ polling_interval: 60
+ interval: 30
+ verify: false
+ client_cert: /etc/kubernetes/ssl/kubelet-client.crt
+ client_key: /etc/kubernetes/ssl/kubelet-client.key
+ endpoint: https://{{ pool.apiserver.host }}:{{ pool.apiserver.port|default('443') }}
{%- endif %}
diff --git a/kubernetes/meta/heka.yml b/kubernetes/meta/heka.yml
index d3390f1..21d85c5 100644
--- a/kubernetes/meta/heka.yml
+++ b/kubernetes/meta/heka.yml
@@ -79,43 +79,6 @@
periods: 0
function: last
{%- endfor %}
- k8s_node_some_not_ready:
- description: 'Some k8s nodes are not ready'
- severity: warning
- logical_operator: and
- rules:
- - metric: k8s_nodes
- field:
- status: not_ready
- relational_operator: '>'
- threshold: 0
- window: 120
- periods: 0
- function: last
- k8s_node_majority_not_ready:
- description: 'Majority of k8s nodes are not ready'
- severity: critical
- rules:
- - metric: k8s_nodes_percent
- field:
- status: not_ready
- relational_operator: '>'
- threshold: 50
- window: 120
- periods: 0
- function: last
- k8s_node_all_not_ready:
- description: 'All k8s node are not ready'
- severity: down
- rules:
- - metric: k8s_nodes_percent
- field:
- status: not_ready
- relational_operator: '=='
- threshold: 100
- window: 60
- periods: 0
- function: last
{%- endif %}
{%- if pool.get('enabled', False) %}
k8s-kubelet_local_endpoint:
@@ -216,14 +179,6 @@
dimension:
service: k8s-{{ kube_service }}-endpoint
{%- endfor %}
- k8s-nodes-not-ready:
- alerting: enabled
- triggers:
- - k8s_node_all_not_ready
- - k8s_node_majority_not_ready
- - k8s_node_some_not_ready
- dimension:
- service: k8s-nodes
{%- endif %}
{%- if pool.get('enabled', False) %}
k8s-kubelet_endpoint:
@@ -288,6 +243,43 @@
window: 60
periods: 0
function: last
+ k8s_node_some_not_ready:
+ description: 'Some k8s nodes are not ready'
+ severity: warning
+ logical_operator: and
+ rules:
+ - metric: k8s_nodes
+ field:
+ status: not_ready
+ relational_operator: '>'
+ threshold: 0
+ window: 120
+ periods: 0
+ function: last
+ k8s_node_majority_not_ready:
+ description: 'Majority of k8s nodes are not ready'
+ severity: critical
+ rules:
+ - metric: k8s_nodes_percent
+ field:
+ status: not_ready
+ relational_operator: '>'
+ threshold: 50
+ window: 120
+ periods: 0
+ function: last
+ k8s_node_all_not_ready:
+ description: 'All k8s node are not ready'
+ severity: down
+ rules:
+ - metric: k8s_nodes_percent
+ field:
+ status: not_ready
+ relational_operator: '=='
+ threshold: 100
+ window: 60
+ periods: 0
+ function: last
alarm:
k8s-apiserver-vip:
alerting: enabled
@@ -295,6 +287,14 @@
- k8s-apiserver_vip
dimension:
service: k8s-apiserver-vip
+ k8s-nodes-not-ready:
+ alerting: enabled
+ triggers:
+ - k8s_node_all_not_ready
+ - k8s_node_majority_not_ready
+ - k8s_node_some_not_ready
+ dimension:
+ service: k8s-nodes
{%- endif %}
{%- if master.get('enabled', False) %}
@@ -348,7 +348,7 @@
k8s-nodes:
policy: highest_severity
alerting: enabled
- group_by: hostname
+ group_by: member
match:
service: k8s-nodes
members: