From: Martin Polreich Date: Tue, 25 Jul 2017 11:48:37 +0000 (+0000) Subject: Merge "Move suites definition from .travis.yml to .kitchen.yml" X-Git-Url: https://gerrit.mcp.mirantis.com/gitweb?p=salt-formulas%2Felasticsearch.git;a=commitdiff_plain;h=bf651faa5c4231feb5b9b10aa1d4c77d67180317;hp=970ced8dd6c4a7f9521e85b99b35ddb3300fc760 Merge "Move suites definition from .travis.yml to .kitchen.yml" --- diff --git a/elasticsearch/files/telegraf.conf b/elasticsearch/files/telegraf.conf new file mode 100644 index 0000000..a987469 --- /dev/null +++ b/elasticsearch/files/telegraf.conf @@ -0,0 +1,7 @@ +[[inputs.elasticsearch]] + servers = [{%- for server in values.servers|default([]) %}"{{ server }}"{%- if not loop.last%}, {% endif %} {%- endfor %}] + http_timeout = "{{ values.http_timeout|default("5s") }}" + local = {%- if values.local %}true{%- else %}false{%- endif %} + cluster_health = {%- if values.cluster_health %}true{%- else %}false{%- endif %} + cluster_stats = {%- if values.cluster_stats %}true{%- else %}false{%- endif %} +{%- include 'telegraf/files/input/_filters.conf' %} diff --git a/elasticsearch/meta/prometheus.yml b/elasticsearch/meta/prometheus.yml index 464111d..f0aa983 100644 --- a/elasticsearch/meta/prometheus.yml +++ b/elasticsearch/meta/prometheus.yml @@ -1,19 +1,13 @@ -{%- if pillar.elasticsearch.server is defined %} -{% raw %} +{%- if pillar.elasticsearch.server is defined or pillar.elasticsearch.client is defined %} +{%- from "elasticsearch/map.jinja" import server, client with context %} + server: alert: - ElasticsearchDown: - if: >- - elasticsearch_up != 1 - labels: - severity: warning - service: elasticsearch - annotations: - summary: 'Elasticsearch service down' - description: 'Elasticsearch service is down on node {{ $labels.host }}' +{%- if client.get('enabled', False) %} +{%- raw %} ElasticsearchClusterHealthStatusYellow: if: >- - max_over_time(elasticsearch_cluster_health_status[5m]) == 2 + elasticsearch_cluster_health_status == 2 labels: severity: warning service: elasticsearch @@ -23,7 +17,7 @@ server: The Elasticsearch cluster status is YELLOW for the last 5 minutes. ElasticsearchClusterHealthStatusRed: if: >- - max_over_time(elasticsearch_cluster_health_status[5m]) == 3 + elasticsearch_cluster_health_status == 3 labels: severity: critical service: elasticsearch @@ -31,6 +25,19 @@ server: summary: 'Elasticsearch cluster status is RED' description: >- The Elasticsearch cluster status is RED for the last 5 minutes. +{%- endraw %} +{%- endif %} +{%- if server.get('enabled', False) %} +{%- raw %} + ElasticsearchDown: + if: >- + elasticsearch_up{host=~'.*'} != 1 + labels: + severity: warning + service: elasticsearch + annotations: + summary: 'Elasticsearch service down' + description: 'Elasticsearch service is down on node {{ $labels.host }}' ElasticsearchClusterDiskLowWaterMark: if: >- (max(elasticsearch_fs_total_total_in_bytes) by (host, instance) - max(elasticsearch_fs_total_available_in_bytes) by (host, instance)) / max(elasticsearch_fs_total_total_in_bytes) by (host, instance) * 100.0 >= 85 @@ -54,5 +61,6 @@ server: labels: severity: critical service: elasticsearch -{% endraw %} +{%- endraw %} +{%- endif %} {%- endif %} diff --git a/elasticsearch/meta/telegraf.yml b/elasticsearch/meta/telegraf.yml index 3568601..6ccac3f 100644 --- a/elasticsearch/meta/telegraf.yml +++ b/elasticsearch/meta/telegraf.yml @@ -1,13 +1,31 @@ -{%- from "elasticsearch/map.jinja" import server with context %} +{%- if pillar.elasticsearch.server is defined or pillar.elasticsearch.client is defined %} +{%- from "elasticsearch/map.jinja" import server, client with context %} + {%- if server.get('enabled', False) %} -{%- set address = server.get('bind', {}).get('address', '127.0.0.1') %} -{%- set port = server.get('bind', {}).get('port', 9200) %} -{%- set servers = ['http://{}:{}'.format(address, port)] %} +{%- set bind = server.get('bind', {}) %} +{# The local agent gathers the node's metrics + cluster stats if the node is master #} agent: input: elasticsearch: - servers: {{ servers|yaml }} + template: elasticsearch/files/telegraf.conf + servers: + - "http://{{ bind.address|default('127.0.0.1') }}:{{ bind.port|default(9200) }}" + cluster_health: false + cluster_stats: true local: true +{%- endif %} + +{%- if client.get('enabled', False) %} +{# The remote agent gathers only the cluster health metrics #} +remote_agent: + input: + elasticsearch: + template: elasticsearch/files/telegraf.conf + servers: + - "http://{{ client.server.host }}:{{ client.server.get('port', 9200) }}" cluster_health: true - cluster_stats: true + cluster_stats: false + local: false + namepass: [ 'elasticsearch_cluster_health*' ] +{%- endif %} {%- endif %}