Enable log monitoring
Change-Id: Id8fd43b342fefcca42e2acfff64bb9faf46203dd
diff --git a/metadata/service/support.yml b/metadata/service/support.yml
index ac7d3ba..ea701e1 100644
--- a/metadata/service/support.yml
+++ b/metadata/service/support.yml
@@ -4,8 +4,10 @@
collectd:
enabled: false
heka:
- enabled: false
+ enabled: true
sensu:
enabled: false
sphinx:
enabled: false
+ prometheus:
+ enabled: true
diff --git a/octavia/map.jinja b/octavia/map.jinja
index 494c18d..3280dc4 100644
--- a/octavia/map.jinja
+++ b/octavia/map.jinja
@@ -47,3 +47,9 @@
}
},
}, merge=pillar.octavia.get('manager', {})) %}
+
+{% set monitoring = salt['grains.filter_by']({
+ 'default': {
+ 'error_log_rate': 0.2,
+ },
+}, merge=pillar.octavia.get('monitoring', {})) %}
diff --git a/octavia/meta/heka.yml b/octavia/meta/heka.yml
new file mode 100644
index 0000000..61b35b5
--- /dev/null
+++ b/octavia/meta/heka.yml
@@ -0,0 +1,20 @@
+log_collector:
+ decoder:
+ octavia:
+ engine: sandbox
+ module_file: /usr/share/lma_collector/decoders/openstack_log.lua
+ module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules
+ adjust_timezone: true
+ splitter:
+ octavia:
+ engine: token
+ delimiter: '\n'
+ input:
+ octavia_log:
+ engine: logstreamer
+ log_directory: "/var/log"
+ file_match: 'octavia/(?P<Service>(api|health-manager|housekeeping|worker))\.log\.?(?P<Seq>\d*)$'
+ differentiator: ['octavia', '_', 'Service']
+ priority: ["^Seq"]
+ decoder: "octavia_decoder"
+ splitter: "octavia_splitter"
diff --git a/octavia/meta/prometheus.yml b/octavia/meta/prometheus.yml
new file mode 100644
index 0000000..b75fd3a
--- /dev/null
+++ b/octavia/meta/prometheus.yml
@@ -0,0 +1,17 @@
+{%- from "octavia/map.jinja" import api, monitoring with context %}
+
+{%- if api.get('enabled', False) %}
+server:
+ alert:
+ OctaviaErrorLogsTooHigh:
+ {%- set log_threshold = monitoring.error_log_rate|float %}
+ if: >-
+ sum(rate(log_messages{service="octavia",level=~"error|emergency|fatal"}[5m])) without (level) > {{ log_threshold }}
+{%- raw %}
+ labels:
+ severity: warning
+ service: "{{ $labels.service }}"
+ annotations:
+ summary: 'Too many errors in {{ $labels.service }} logs'
+ description: 'The rate of errors in {{ $labels.service }} logs over the last 5 minutes is too high on node {{ $labels.host }} (current value={{ $value }}, threshold={%- endraw %}{{ log_threshold }}).'
+{%- endif %}