Merge "Add prometheus main dashboard"
diff --git a/nova/compute.sls b/nova/compute.sls
index 641305e..07fdfe2 100644
--- a/nova/compute.sls
+++ b/nova/compute.sls
@@ -186,24 +186,68 @@
pkg.installed:
- name: ceph-common
+{%- if compute.ceph.cinder_secret_uuid is defined and compute.ceph.cinder_volumes_key is defined %}
+
+{%- set cinder_volumes_key = salt['grains.get']("ceph:ceph_keyring:"+compute.ceph.cinder_volumes_key+":key", '') %}
+
+{%- if cinder_volumes_key != '' %}
+
+/etc/secret_cinder.xml:
+ file.managed:
+ - source: salt://nova/files/secret_cinder.xml
+ - template: jinja
+
+ceph_virsh_secret_define_cinder:
+ cmd.run:
+ - name: "virsh secret-define --file /etc/secret_cinder.xml"
+ - unless: "virsh secret-list | grep {{ compute.ceph.cinder_secret_uuid }}"
+ - require:
+ - file: /etc/secret_cinder.xml
+
+ceph_virsh_secret_set_value_cinder:
+ cmd.run:
+ - name: "virsh secret-set-value --secret {{ compute.ceph.cinder_secret_uuid }} --base64 {{ cinder_volumes_key }} "
+ - unless: "virsh secret-get-value {{ compute.ceph.cinder_secret_uuid }} | grep {{ cinder_volumes_key }}"
+ - require:
+ - cmd: ceph_virsh_secret_define_cinder
+
+{% endif %}
+
+{% endif %}
+
/etc/secret.xml:
file.managed:
- source: salt://nova/files/secret.xml
- template: jinja
-ceph_virsh_secret_define:
+ceph_virsh_secret_define_nova:
cmd.run:
- name: "virsh secret-define --file /etc/secret.xml"
- unless: "virsh secret-list | grep {{ compute.ceph.secret_uuid }}"
- require:
- file: /etc/secret.xml
-ceph_virsh_secret_set_value:
+{%- set client_cinder_key = salt['grains.get']("ceph:ceph_keyring:"+compute.ceph.client_cinder_key+":key", '') %}
+
+{%- if client_cinder_key != '' %}
+
+ceph_virsh_secret_set_value_nova:
+ cmd.run:
+ - name: "virsh secret-set-value --secret {{ compute.ceph.secret_uuid }} --base64 {{ client_cinder_key }} "
+ - unless: "virsh secret-get-value {{ compute.ceph.secret_uuid }} | grep {{ client_cinder_key }}"
+ - require:
+ - cmd: ceph_virsh_secret_define_nova
+
+{% else %}
+
+ceph_virsh_secret_set_value_nova:
cmd.run:
- name: "virsh secret-set-value --secret {{ compute.ceph.secret_uuid }} --base64 {{ compute.ceph.client_cinder_key }} "
- unless: "virsh secret-get-value {{ compute.ceph.secret_uuid }} | grep {{ compute.ceph.client_cinder_key }}"
- require:
- - cmd: ceph_virsh_secret_define
+ - cmd: ceph_virsh_secret_define_nova
+
+{% endif %}
{% endif %}
diff --git a/nova/files/grafana_dashboards/nova_prometheus.json b/nova/files/grafana_dashboards/nova_prometheus.json
index a4e07d2..2e6a249 100644
--- a/nova/files/grafana_dashboards/nova_prometheus.json
+++ b/nova/files/grafana_dashboards/nova_prometheus.json
@@ -2499,6 +2499,182 @@
"show": true
}
]
+ },
+ {
+ "cacheTimeout": null,
+ "colorBackground": false,
+ "colorValue": false,
+ "colors": [
+ "rgba(245, 54, 54, 0.9)",
+ "rgba(237, 129, 40, 0.89)",
+ "rgba(50, 172, 45, 0.97)"
+ ],
+ "datasource": "prometheus",
+ "decimals": null,
+ "format": "s",
+ "gauge": {
+ "maxValue": 100,
+ "minValue": 0,
+ "show": false,
+ "thresholdLabels": false,
+ "thresholdMarkers": true
+ },
+ "id": 52,
+ "interval": null,
+ "links": [],
+ "mappingType": 1,
+ "mappingTypes": [
+ {
+ "name": "value to text",
+ "value": 1
+ },
+ {
+ "name": "range to text",
+ "value": 2
+ }
+ ],
+ "maxDataPoints": 100,
+ "nullPointMode": "connected",
+ "nullText": null,
+ "postfix": "",
+ "postfixFontSize": "50%",
+ "prefix": "",
+ "prefixFontSize": "50%",
+ "rangeMaps": [
+ {
+ "from": "null",
+ "text": "N/A",
+ "to": "null"
+ }
+ ],
+ "span": 2,
+ "sparkline": {
+ "fillColor": "rgba(31, 118, 189, 0.18)",
+ "full": false,
+ "lineColor": "rgb(31, 120, 193)",
+ "show": false
+ },
+ "tableColumn": "",
+ "targets": [
+ {
+ "expr": "avg(openstack_nova_instance_creation_time)",
+ "format": "time_series",
+ "hide": false,
+ "intervalFactor": 2,
+ "legendFormat": "",
+ "metric": "",
+ "refId": "A",
+ "step": 60
+ }
+ ],
+ "thresholds": "",
+ "title": "Instance creation time (average)",
+ "type": "singlestat",
+ "valueFontSize": "80%",
+ "valueMaps": [
+ {
+ "op": "=",
+ "text": "N/A",
+ "value": "null"
+ }
+ ],
+ "valueName": "min"
+ },
+ {
+ "aliasColors": {
+ "Avg": "#6ED0E0"
+ },
+ "bars": false,
+ "dashLength": 10,
+ "dashes": false,
+ "datasource": "prometheus",
+ "fill": 1,
+ "id": 53,
+ "legend": {
+ "avg": false,
+ "current": false,
+ "hideEmpty": false,
+ "hideZero": false,
+ "max": false,
+ "min": false,
+ "show": true,
+ "total": false,
+ "values": false
+ },
+ "lines": true,
+ "linewidth": 1,
+ "links": [],
+ "nullPointMode": "null as zero",
+ "percentage": false,
+ "pointradius": 5,
+ "points": false,
+ "renderer": "flot",
+ "seriesOverrides": [],
+ "spaceLength": 10,
+ "span": 10,
+ "stack": false,
+ "steppedLine": false,
+ "targets": [
+ {
+ "expr": "avg(openstack_nova_instance_creation_time)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Avg",
+ "refId": "A",
+ "step": 4
+ },
+ {
+ "expr": "min(openstack_nova_instance_creation_time)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Min",
+ "refId": "B",
+ "step": 4
+ },
+ {
+ "expr": "max(openstack_nova_instance_creation_time)",
+ "format": "time_series",
+ "intervalFactor": 2,
+ "legendFormat": "Max",
+ "refId": "C",
+ "step": 4
+ }
+ ],
+ "thresholds": [],
+ "timeFrom": null,
+ "timeShift": null,
+ "title": "Instances",
+ "tooltip": {
+ "shared": true,
+ "sort": 0,
+ "value_type": "individual"
+ },
+ "type": "graph",
+ "xaxis": {
+ "buckets": null,
+ "mode": "time",
+ "name": null,
+ "show": true,
+ "values": []
+ },
+ "yaxes": [
+ {
+ "format": "s",
+ "label": "Creation time",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ },
+ {
+ "format": "short",
+ "label": "",
+ "logBase": 1,
+ "max": null,
+ "min": null,
+ "show": true
+ }
+ ]
}
],
"repeat": null,
diff --git a/nova/files/mitaka/nova-compute.conf.Debian b/nova/files/mitaka/nova-compute.conf.Debian
index bdd636c..232b569 100644
--- a/nova/files/mitaka/nova-compute.conf.Debian
+++ b/nova/files/mitaka/nova-compute.conf.Debian
@@ -122,6 +122,9 @@
libvirt_inject_password=True
block_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE,VIR_MIGRATE_PEER2PEER,VIR_MIGRATE_LIVE,VIR_MIGRATE_NON_SHARED_INC
live_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE,VIR_MIGRATE_PEER2PEER,VIR_MIGRATE_LIVE,VIR_MIGRATE_PERSIST_DEST
+{%- if compute.libvirt.migration_inbound_addr is defined %}
+live_migration_inbound_addr = {{ compute.libvirt.migration_inbound_addr }}
+{%- endif %}
libvirt_inject_key=True
inject_key=False
vif_driver=nova.virt.libvirt.vif.LibvirtGenericVIFDriver
diff --git a/nova/files/newton/nova-compute.conf.Debian b/nova/files/newton/nova-compute.conf.Debian
index fb6fc85..7f950c1 100644
--- a/nova/files/newton/nova-compute.conf.Debian
+++ b/nova/files/newton/nova-compute.conf.Debian
@@ -165,6 +165,9 @@
libvirt_inject_password=True
block_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE,VIR_MIGRATE_PEER2PEER,VIR_MIGRATE_LIVE,VIR_MIGRATE_NON_SHARED_INC
live_migration_flag=VIR_MIGRATE_UNDEFINE_SOURCE,VIR_MIGRATE_PEER2PEER,VIR_MIGRATE_LIVE,VIR_MIGRATE_PERSIST_DEST
+{%- if compute.libvirt.migration_inbound_addr is defined %}
+live_migration_inbound_addr = {{ compute.libvirt.migration_inbound_addr }}
+{%- endif %}
libvirt_inject_key=True
inject_key=False
vif_driver=nova.virt.libvirt.vif.LibvirtGenericVIFDriver
diff --git a/nova/files/ocata/nova-compute.conf.Debian b/nova/files/ocata/nova-compute.conf.Debian
index 2e2d276..f7db4c2 100644
--- a/nova/files/ocata/nova-compute.conf.Debian
+++ b/nova/files/ocata/nova-compute.conf.Debian
@@ -6147,6 +6147,9 @@
# * A valid IP address or hostname, else None.
# (string value)
#live_migration_inbound_addr=<None>
+{%- if compute.libvirt.migration_inbound_addr is defined %}
+live_migration_inbound_addr = {{ compute.libvirt.migration_inbound_addr }}
+{%- endif %}
# DEPRECATED:
# Live migration target URI to use.
diff --git a/nova/files/secret.xml b/nova/files/secret.xml
index 19e55c8..aacd09e 100644
--- a/nova/files/secret.xml
+++ b/nova/files/secret.xml
@@ -4,4 +4,4 @@
<usage type='ceph'>
<name>client.{{ compute.ceph.get('rbd_user', 'cinder') }} secret</name>
</usage>
-</secret>
+</secret>
\ No newline at end of file
diff --git a/nova/files/secret_cinder.xml b/nova/files/secret_cinder.xml
new file mode 100644
index 0000000..01e4dda
--- /dev/null
+++ b/nova/files/secret_cinder.xml
@@ -0,0 +1,4 @@
+{%- from "nova/map.jinja" import compute with context %}
+<secret ephemeral='no' private='no'>
+ <uuid>{{ compute.ceph.cinder_secret_uuid }}</uuid>
+</secret>
diff --git a/nova/map.jinja b/nova/map.jinja
index f725112..d8610f6 100644
--- a/nova/map.jinja
+++ b/nova/map.jinja
@@ -114,5 +114,9 @@
'error_log_rate': {
'warn': 0.2,
},
+ 'services_failed_warning_threshold_percent': 0.3,
+ 'services_failed_critical_threshold_percent': 0.6,
+ 'computes_failed_warning_threshold_percent': 0.25,
+ 'computes_failed_critical_threshold_percent': 0.5,
},
}, grain='os_family', merge=salt['pillar.get']('nova:monitoring')) %}
diff --git a/nova/meta/prometheus.yml b/nova/meta/prometheus.yml
index 71d6ac9..8599418 100644
--- a/nova/meta/prometheus.yml
+++ b/nova/meta/prometheus.yml
@@ -41,30 +41,41 @@
annotations:
summary: "Endpoint check for '{{ $labels.service }}' is down"
description: >-
- Endpoint check for '{{ $labels.service }}' is down for 2 minutes
- NovaSomeServicesDown:
+ Endpoint check for '{{ $labels.service }}' is down for the last 2 minutes
+ NovaAPIServiceDown:
if: >-
- openstack_nova_services{state="down",service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"} > 0 and ignoring(state) openstack_nova_services{state="up",service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"} >= 2
+ http_response_status{service=~"nova-api"} == 0
+ for: 2m
+ labels:
+ severity: down
+ service: "{{ $labels.service }}"
+ annotations:
+ summary: "HTTP check for '{{ $labels.service }}' down"
+ description: >-
+ The HTTP check for '{{ $labels.service }}' is down on {{ $labels.host }} for the last 2 minutes.
+ NovaServicesWarning:
+ if: >-
+ openstack_nova_services{state="down",service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"} >= on (service) sum(openstack_nova_services{service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"}) by (service) * {%- endraw %} {{monitoring.services_failed_warning_threshold_percent}} {%- raw %}
for: 2m
labels:
severity: warning
service: "{{ $labels.service }}"
annotations:
- summary: "Some {{ $labels.service }} services down"
+ summary: "More than {%- endraw %} {{monitoring.services_failed_warning_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down"
description: >-
- {{ $value }} '{{ $labels.service }}' service(s) is/are down for 2 minutes
- NovaOnlyOneServiceUp:
+ More than {%- endraw %} {{monitoring.services_failed_warning_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down for the last 2 minutes
+ NovaServicesCritical:
if: >-
- openstack_nova_services{state="up",service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"} == 1 and ignoring(state) openstack_nova_services{state=~"down|disabled",service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"} > 0
+ openstack_nova_services{state="down",service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"} >= on (service) sum(openstack_nova_services{service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"}) by (service) * {%- endraw %} {{monitoring.services_failed_critical_threshold_percent}} {%- raw %}
for: 2m
labels:
severity: critical
service: "{{ $labels.service }}"
annotations:
- summary: "Only one {{ $labels.service }} service up"
+ summary: "More than {%- endraw %} {{monitoring.services_failed_critical_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down"
description: >-
- Only one '{{ $labels.service }}' service is up for 2 minutes
- NovaAllServicesDown:
+ More than {%- endraw %} {{monitoring.services_failed_critical_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down for the last 2 minutes
+ NovaServicesDown:
if: >-
openstack_nova_services{state="up",service=~"nova-cert|nova-conductor|nova-consoleauth|nova-scheduler"} == 0
for: 2m
@@ -74,30 +85,30 @@
annotations:
summary: "All {{ $labels.service }} services down"
description: >-
- All '{{ $labels.service }}' services are down for 2 minutes
- NovaSomeComputesDown:
+ All '{{ $labels.service }}' services are down for the last 2 minutes
+ NovaComputesWarning:
if: >-
- openstack_nova_services{state="down",service=~"nova-compute"} > 0
+ openstack_nova_services{state="down",service=~"nova-compute"} >= on (service) sum(openstack_nova_services{service=~"nova-compute"}) by (service) * {%- endraw %} {{monitoring.computes_failed_warning_threshold_percent}} {%- raw %}
for: 2m
labels:
severity: warning
service: "{{ $labels.service }}"
annotations:
- summary: "Some {{ $labels.service }} services down"
+ summary: "More than {%- endraw %} {{monitoring.computes_failed_warning_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down"
description: >-
- {{ $value }} '{{ $labels.service }}' service(s) is/are down for 2 minutes
- NovaMajorityComputesDown:
+ More than {%- endraw %} {{monitoring.computes_failed_warning_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down for the last 2 minutes
+ NovaComputesCritical:
if: >-
- openstack_nova_services_percent{state="down",service=~"nova-compute"} > 50
+ openstack_nova_services_percent{state="down",service=~"nova-compute"} >= on (service) sum(openstack_nova_services{service=~"nova-compute"}) by (service) * {%- endraw %} {{monitoring.computes_failed_critical_threshold_percent}} {%- raw %}
for: 2m
labels:
severity: critical
service: "{{ $labels.service }}"
annotations:
- summary: "Only one {{ $labels.service }} service up"
+ summary: "More than {%- endraw %} {{monitoring.computes_failed_critical_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down"
description: >-
- Only one '{{ $labels.service }}' service is up for 2 minutes
- NovaAllComputesDown:
+ More than {%- endraw %} {{monitoring.computes_failed_critical_threshold_percent*100}}%{%- raw %} of {{ $labels.service }} services are down for the last 2 minutes
+ NovaComputesDown:
if: >-
openstack_nova_services{state="up",service=~"nova-compute"} == 0
for: 2m
@@ -105,9 +116,9 @@
severity: down
service: "{{ $labels.service }}"
annotations:
- summary: "All {{ $labels.service }} services down"
+ summary: "All {{ $labels.service }} services are down"
description: >-
- All '{{ $labels.service }}' services are down for 2 minutes
+ All '{{ $labels.service }}' services are down for the last 2 minutes
NovaTotalFreeVCPUsLow:
if: >-
(100.0 * openstack_nova_total_free_vcpus) / (openstack_nova_total_free_vcpus + openstack_nova_total_used_vcpus) < 10.0