Monitor Keystone keys (Fernet/Credentials) rotation
- suppress SSH banner
- collect all lines from keystone-rotate.log
- add alert based on log_messages metric
Change-Id: I3eebedd8ff3bd03eca0f6221e9dbfd801aae989b
Related-bug: PROD-35477
diff --git a/keystone/files/keystone_keys_rotate.sh b/keystone/files/keystone_keys_rotate.sh
index ccec5ff..8cf9fc1 100644
--- a/keystone/files/keystone_keys_rotate.sh
+++ b/keystone/files/keystone_keys_rotate.sh
@@ -87,8 +87,8 @@
run_rsync () {
local sync_dir=$1
local sync_node=$2
- rsync -e 'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' -avz --exclude 0 ${sync_dir} keystone@${sync_node}:${sync_dir}
- rsync -e 'ssh -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' -avz --delete-after ${sync_dir} keystone@${sync_node}:${sync_dir}
+ rsync -e 'ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' -avz --exclude 0 ${sync_dir} keystone@${sync_node}:${sync_dir}
+ rsync -e 'ssh -q -o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no' -avz --delete-after ${sync_dir} keystone@${sync_node}:${sync_dir}
}
run_keystone () {
diff --git a/keystone/meta/fluentd.yml b/keystone/meta/fluentd.yml
index e61ef44..6bc613a 100644
--- a/keystone/meta/fluentd.yml
+++ b/keystone/meta/fluentd.yml
@@ -15,6 +15,49 @@
tag: openstack.keystone
type: relabel
label: openstack_keystone
+ openstack_keystone_keys_rotation:
+ input:
+ keystone_rotate_in_tail:
+ type: tail
+ path: /var/log/keystone/keystone-rotate.log
+ tag: openstack.keystone
+ pos_file: {{ positiondb }}/keystone.keys.pos
+ parser:
+ type: multiline
+ time_key: Timestamp
+ time_format: '%d_%m_%Y-%H:%M'
+ keep_time_key: false
+ # Log format https://regex101.com/r/mYRLVu/1
+ format_firstline: '/^Script started at: (?<Timestamp>.*)/'
+ format: '/^(?<Payload>.*)/'
+ filter:
+ add_keystone_keys_record_fields:
+ tag: openstack.keystone
+ type: record_transformer
+ enable_ruby: true
+ record:
+ - name: Severity
+ value: '${ if record["Payload"].include? "error" then 3.to_s else 6.to_s end }'
+ - name: severity_label
+ value: '${ if record["Payload"].include? "error" then "ERROR" else "INFO" end }'
+ - name: programname
+ value: keystone-keys-rotation
+ match:
+ send_to_default:
+ tag: openstack.keystone
+ type: copy
+ store:
+ - type: relabel
+ label: default_output
+ - type: rewrite_tag_filter
+ rule:
+ - name: severity_label
+ regexp: '.'
+ result: metric.keystone_log_messages
+ push_to_metric:
+ tag: 'metric.**'
+ type: relabel
+ label: default_metric
{%- if server.service_name in ['apache2', 'httpd'] %}
openstack_keystone_wsgi:
input:
@@ -136,7 +179,7 @@
desc: Total number of log lines by severity
label:
- name: service
- value: keystone
+ value: ${programname}
- name: level
value: ${severity_label}
- name: host
@@ -160,4 +203,4 @@
value: keystone
- name: host
value: ${Hostname}
-{% endif %}
\ No newline at end of file
+{% endif %}
diff --git a/keystone/meta/heka.yml b/keystone/meta/heka.yml
index 84ed391..6bcea1e 100644
--- a/keystone/meta/heka.yml
+++ b/keystone/meta/heka.yml
@@ -44,7 +44,7 @@
rules:
- metric: log_messages
field:
- service: keystone
+ service: '== keystone || == keystone-wsgi'
level: error
relational_operator: '>'
threshold: 0.1
diff --git a/keystone/meta/prometheus.yml b/keystone/meta/prometheus.yml
index 97ff533..18c487f 100644
--- a/keystone/meta/prometheus.yml
+++ b/keystone/meta/prometheus.yml
@@ -70,7 +70,7 @@
{%- endraw %}
{%- set log_threshold = monitoring.error_log_rate|float %}
if: >-
- sum(rate(log_messages{service="keystone",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > {{ log_threshold }}
+ sum(rate(log_messages{service=~"keystone|keystone-wsgi",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > {{ log_threshold }}
{%- raw %}
labels:
severity: warning
@@ -91,6 +91,17 @@
annotations:
summary: "High response time of Keystone API"
description: "The Keystone API response time for GET and POST requests on the {{ $labels.host }} node is higher than {% endraw %}{{response_time_threshold}}s for 2 minutes."
+ {% raw %}
+ KeystoneKeysRotationFailure:
+ if: >-
+ increase(log_messages{service="keystone-keys-rotation",level="ERROR"}[2h]) > 0
+ labels:
+ severity: major
+ service: keystone
+ annotations:
+ summary: "Keystone keys rotation failure"
+ description: "Keystone user failed to rotate Fernet or Credential keys across control nodes. Check /var/log/keystone/keystone-rotate.log on the {{ $labels.host }} node for details."
+{%- endraw %}
{%- endif %}
{%- set range_duration = monitoring.api_monitoring_duration %}
recording: