Merge "Update .travis.yml and .kitchen.yml files for parallel testing"
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index ac7d3ba..fce5030 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -4,8 +4,12 @@
collectd:
enabled: false
heka:
- enabled: false
+ enabled: true
sensu:
enabled: false
sphinx:
enabled: false
+ prometheus:
+ enabled: true
+ telegraf:
+ enabled: true
diff --git a/octavia/map.jinja b/octavia/map.jinja
index 494c18d..3280dc4 100644
--- a/octavia/map.jinja
+++ b/octavia/map.jinja
@@ -47,3 +47,9 @@
}
},
}, merge=pillar.octavia.get('manager', {})) %}
+
+{% set monitoring = salt['grains.filter_by']({
+ 'default': {
+ 'error_log_rate': 0.2,
+ },
+}, merge=pillar.octavia.get('monitoring', {})) %}
diff --git a/octavia/meta/heka.yml b/octavia/meta/heka.yml
new file mode 100644
index 0000000..61b35b5
--- /dev/null
+++ b/octavia/meta/heka.yml
@@ -0,0 +1,20 @@
+log_collector:
+ decoder:
+ octavia:
+ engine: sandbox
+ module_file: /usr/share/lma_collector/decoders/openstack_log.lua
+ module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
+ adjust_timezone: true
+ splitter:
+ octavia:
+ engine: token
+ delimiter: '\n'
+ input:
+ octavia_log:
+ engine: logstreamer
+ log_directory: "/var/log"
+ file_match: 'octavia/(?P<Service>(api|health-manager|housekeeping|worker))\.log\.?(?P<Seq>\d*)$'
+ differentiator: ['octavia', '_', 'Service']
+ priority: ["^Seq"]
+ decoder: "octavia_decoder"
+ splitter: "octavia_splitter"
diff --git a/octavia/meta/prometheus.yml b/octavia/meta/prometheus.yml
new file mode 100644
index 0000000..dcde1d1
--- /dev/null
+++ b/octavia/meta/prometheus.yml
@@ -0,0 +1,30 @@
+{%- from "octavia/map.jinja" import api, monitoring with context %}
+
+{%- if api.get('enabled', False) %}
+server:
+ alert:
+{%- raw %}
+ OctaviaAPIDown:
+ if: >-
+ max(openstack_api_check_status{service="octavia-api"}) == 0
+ for: 2m
+ labels:
+ severity: down
+ service: "{{ $labels.service }}"
+ annotations:
+ summary: "Endpoint check for '{{ $labels.service}}' is down"
+ description: >-
+ Endpoint check for '{{ $labels.service}}' is down for 2 minutes
+{%- endraw %}
+ OctaviaErrorLogsTooHigh:
+ {%- set log_threshold = monitoring.error_log_rate|float %}
+ if: >-
+ sum(rate(log_messages{service="octavia",level=~"error|emergency|fatal"}[5m])) without (level) > {{ log_threshold }}
+{%- raw %}
+ labels:
+ severity: warning
+ service: "{{ $labels.service }}"
+ annotations:
+ summary: 'Too many errors in {{ $labels.service }} logs'
+ description: 'The rate of errors in {{ $labels.service }} logs over the last 5 minutes is too high on node {{ $labels.host }} (current value={{ $value }}, threshold={%- endraw %}{{ log_threshold }}).'
+{%- endif %}
diff --git a/octavia/meta/telegraf.yml b/octavia/meta/telegraf.yml
new file mode 100644
index 0000000..2af7d05
--- /dev/null
+++ b/octavia/meta/telegraf.yml
@@ -0,0 +1,9 @@
+{%- from "octavia/map.jinja" import api with context %}
+{%- if api.get('enabled', False) %}
+agent:
+ input:
+ http_response:
+ octavia-api:
+ address: "http://{{ api.bind.address|replace('0.0.0.0', '127.0.0.1') }}:{{ api.bind.port }}/"
+ expected_code: 200
+{%- endif %}