Merge "Added the ability to configuring white list of PCI devices"
diff --git a/README.rst b/README.rst
index e57665b..0184a83 100644
--- a/README.rst
+++ b/README.rst
@@ -32,6 +32,7 @@
dhcp_domain: novalocal
vif_plugging_timeout: 300
vif_plugging_is_fatal: false
+ instance_build_timeout: 600
consoleauth:
token_ttl: 600
bind:
@@ -1219,6 +1220,34 @@
connection_debug: 10
pool_timeout: 120
+
+Configure nova to use service user tokens:
+========
+Long-running operations such as live migration or snapshot can sometimes overrun the
+expiry of the user token. In such cases, post operations such as cleaning up after a
+live migration can fail when the nova-compute service needs to cleanup resources in
+other services, such as in the block-storage (cinder) or networking (neutron) services.
+
+This patch enables nova to use service user tokens to supplement the regular user token
+used to initiate the operation. The identity service (keystone) will then authenticate
+a request using the service user token if the user token has already expired.
+
+.. code-block:: yaml
+
+ nova:
+ controller:
+ enabled: True
+ ...
+ service_user:
+ enabled: True
+ user_domain_id: default
+ project_domain_id: default
+ project_name: service
+ username: nova
+ password: pswd
+
+
+
Upgrades
========
diff --git a/nova/files/ocata/nova-compute.conf.Debian b/nova/files/ocata/nova-compute.conf.Debian
index 3fbf86a..13be77c 100644
--- a/nova/files/ocata/nova-compute.conf.Debian
+++ b/nova/files/ocata/nova-compute.conf.Debian
@@ -9053,6 +9053,24 @@
#
# From nova.conf
#
+{%- if compute.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+auth_type = password
+ {%- if compute.service_user is defined %}
+ {%- set _data=compute.service_user %}
+ {%- else %}
+ {%- set _data=compute.identity %}
+ {%- endif %}
+user_domain_id = {{ _data.get('domain', 'default') }}
+project_domain_id = {{ _data.get('domain', 'default') }}
+project_name = {{ _data.get('tenant', 'service') }}
+username = {{ _data.get('user', 'nova') }}
+password = {{ _data.password }}
+uth_url={{ compute.identity.get('protocol', 'http') }}://{{ compute.identity.host }}:5000
+ {%- if compute.identity.get('protocol', 'http') == 'https' %}
+cafile={{ compute.identity.get('cacert_file', compute.cacert_file) }}
+ {%- endif %}
+{%- endif %}
#
# When True, if sending a user token to an REST API, also send a service token.
diff --git a/nova/files/ocata/nova-controller.conf.Debian b/nova/files/ocata/nova-controller.conf.Debian
index 440a2cd..cfb4a1d 100644
--- a/nova/files/ocata/nova-controller.conf.Debian
+++ b/nova/files/ocata/nova-controller.conf.Debian
@@ -9030,6 +9030,24 @@
#
# From nova.conf
#
+{%- if controller.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+auth_type = password
+ {%- if controller.service_user is defined %}
+ {%- set _data=controller.service_user %}
+ {%- else %}
+ {%- set _data=controller.identity %}
+ {%- endif %}
+user_domain_id = {{ _data.get('domain', 'default') }}
+project_domain_id = {{ _data.get('domain', 'default') }}
+project_name = {{ _data.get('tenant', 'service') }}
+username = {{ _data.get('user', 'nova') }}
+password = {{ _data.password }}
+uth_url={{ controller.identity.get('protocol', 'http') }}://{{ controller.identity.host }}:5000
+ {%- if controller.identity.get('protocol', 'http') == 'https' %}
+cafile={{ controller.identity.get('cacert_file', controller.cacert_file) }}
+ {%- endif %}
+{%- endif %}
#
# When True, if sending a user token to an REST API, also send a service token.
diff --git a/nova/files/pike/nova-compute.conf.Debian b/nova/files/pike/nova-compute.conf.Debian
index 9bbd313..1dbc3d3 100644
--- a/nova/files/pike/nova-compute.conf.Debian
+++ b/nova/files/pike/nova-compute.conf.Debian
@@ -1028,7 +1028,11 @@
# * Any positive integer in seconds: Enables the option.
# (integer value)
# Minimum value: 0
-#instance_build_timeout=0
+{%- if compute.instance_build_timeout is defined %}
+instance_build_timeout = {{ compute.instance_build_timeout }}
+{%- else %}
+#instance_build_timeout = 0
+{%- endif %}
#
# Interval to wait before un-rescuing an instance stuck in RESCUE.
@@ -9252,6 +9256,24 @@
#
# From nova.conf
#
+{%- if compute.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+auth_type = password
+ {%- if compute.service_user is defined %}
+ {%- set _data=compute.service_user %}
+ {%- else %}
+ {%- set _data=compute.identity %}
+ {%- endif %}
+user_domain_id = {{ _data.get('domain', 'default') }}
+project_domain_id = {{ _data.get('domain', 'default') }}
+project_name = {{ _data.get('tenant', 'service') }}
+username = {{ _data.get('user', 'nova') }}
+password = {{ _data.password }}
+uth_url={{ compute.identity.get('protocol', 'http') }}://{{ compute.identity.host }}:5000
+ {%- if compute.identity.get('protocol', 'http') == 'https' %}
+cafile={{ compute.identity.get('cacert_file', compute.cacert_file) }}
+ {%- endif %}
+{%- endif %}
#
# When True, if sending a user token to an REST API, also send a service token.
diff --git a/nova/files/pike/nova-controller.conf.Debian b/nova/files/pike/nova-controller.conf.Debian
index 01c05d6..3df6b0d 100644
--- a/nova/files/pike/nova-controller.conf.Debian
+++ b/nova/files/pike/nova-controller.conf.Debian
@@ -1008,7 +1008,11 @@
# * Any positive integer in seconds: Enables the option.
# (integer value)
# Minimum value: 0
-#instance_build_timeout=0
+{%- if controller.instance_build_timeout is defined %}
+instance_build_timeout = {{ controller.instance_build_timeout }}
+{%- else %}
+#instance_build_timeout = 0
+{%- endif %}
#
# Interval to wait before un-rescuing an instance stuck in RESCUE.
@@ -9240,6 +9244,24 @@
#
# From nova.conf
#
+{%- if controller.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+auth_type = password
+ {%- if controller.service_user is defined %}
+ {%- set _data=controller.service_user %}
+ {%- else %}
+ {%- set _data=controller.identity %}
+ {%- endif %}
+user_domain_id = {{ _data.get('domain', 'default') }}
+project_domain_id = {{ _data.get('domain', 'default') }}
+project_name = {{ _data.get('tenant', 'service') }}
+username = {{ _data.get('user', 'nova') }}
+password = {{ _data.password }}
+uth_url={{ controller.identity.get('protocol', 'http') }}://{{ controller.identity.host }}:5000
+ {%- if controller.identity.get('protocol', 'http') == 'https' %}
+cafile={{ controller.identity.get('cacert_file', controller.cacert_file) }}
+ {%- endif %}
+{%- endif %}
#
# When True, if sending a user token to an REST API, also send a service token.
diff --git a/nova/files/queens/nova-compute.conf.Debian b/nova/files/queens/nova-compute.conf.Debian
index ba5e9ed..a056170 100644
--- a/nova/files/queens/nova-compute.conf.Debian
+++ b/nova/files/queens/nova-compute.conf.Debian
@@ -1108,7 +1108,11 @@
# * Any positive integer in seconds: Enables the option.
# (integer value)
# Minimum value: 0
+{%- if compute.instance_build_timeout is defined %}
+instance_build_timeout = {{ compute.instance_build_timeout }}
+{%- else %}
#instance_build_timeout = 0
+{%- endif %}
#
# Interval to wait before un-rescuing an instance stuck in RESCUE.
@@ -9176,6 +9180,16 @@
# middleware.
# (boolean value)
#send_service_user_token = false
+{%- if compute.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+ {%- if compute.service_user is defined %}
+ {%- set _data=compute.service_user %}
+ {%- else %}
+ {%- set _data=compute.identity %}
+ {%- endif %}
+{%- if not _data.port == '5000' %}{% do _data.update({'port': '5000'}) %}{% endif %}
+{%- include "oslo_templates/files/queens/keystoneauth/_type_" + auth_type + ".conf" %}
+{%- else %}
# PEM encoded Certificate Authority to use when verifying HTTPs
# connections. (string value)
@@ -9259,6 +9273,7 @@
# Tenant Name (string value)
#tenant_name = <None>
+{%- endif %}
[spice]
diff --git a/nova/files/queens/nova-controller.conf.Debian b/nova/files/queens/nova-controller.conf.Debian
index 7218677..e38313e 100644
--- a/nova/files/queens/nova-controller.conf.Debian
+++ b/nova/files/queens/nova-controller.conf.Debian
@@ -1099,7 +1099,11 @@
# * Any positive integer in seconds: Enables the option.
# (integer value)
# Minimum value: 0
+{%- if controller.instance_build_timeout is defined %}
+instance_build_timeout = {{ controller.instance_build_timeout }}
+{%- else %}
#instance_build_timeout = 0
+{%- endif %}
#
# Interval to wait before un-rescuing an instance stuck in RESCUE.
@@ -8897,6 +8901,16 @@
# middleware.
# (boolean value)
#send_service_user_token = false
+{%- if controller.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+ {%- if controller.service_user is defined %}
+ {%- set _data=controller.service_user %}
+ {%- else %}
+ {%- set _data=controller.identity %}
+ {%- endif %}
+{%- if not _data.port == '5000' %}{% do _data.update({'port': '5000'}) %}{% endif %}
+{%- include "oslo_templates/files/queens/keystoneauth/_type_" + auth_type + ".conf" %}
+{%- else %}
# PEM encoded Certificate Authority to use when verifying HTTPs
# connections. (string value)
@@ -8980,6 +8994,7 @@
# Tenant Name (string value)
#tenant_name = <None>
+{%- endif %}
[spice]
diff --git a/nova/files/rocky/nova-compute.conf.Debian b/nova/files/rocky/nova-compute.conf.Debian
index 0063e6f..4621588 100644
--- a/nova/files/rocky/nova-compute.conf.Debian
+++ b/nova/files/rocky/nova-compute.conf.Debian
@@ -8512,6 +8512,16 @@
# middleware.
# (boolean value)
#send_service_user_token = false
+{%- if compute.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+ {%- if compute.service_user is defined %}
+ {%- set _data=compute.service_user %}
+ {%- else %}
+ {%- set _data=compute.identity %}
+ {%- endif %}
+{%- if not _data.port == '5000' %}{% do _data.update({'port': '5000'}) %}{% endif %}
+{%- include "oslo_templates/files/" ~ compute.version ~ "/keystoneauth/_type_" + auth_type + ".conf" %}
+{%- else %}
# PEM encoded Certificate Authority to use when verifying HTTPs connections.
# (string value)
@@ -8600,6 +8610,7 @@
# Tenant Name (string value)
#tenant_name = <None>
+{%- endif %}
[spice]
diff --git a/nova/files/rocky/nova-controller.conf.Debian b/nova/files/rocky/nova-controller.conf.Debian
index 6e04826..bbb411f 100644
--- a/nova/files/rocky/nova-controller.conf.Debian
+++ b/nova/files/rocky/nova-controller.conf.Debian
@@ -8232,6 +8232,16 @@
# middleware.
# (boolean value)
#send_service_user_token = false
+{%- if controller.get('service_user', {}).get('enabled', True) %}
+send_service_user_token = True
+ {%- if controller.service_user is defined %}
+ {%- set _data=controller.service_user %}
+ {%- else %}
+ {%- set _data=controller.identity %}
+ {%- endif %}
+{%- if not _data.port == '5000' %}{% do _data.update({'port': '5000'}) %}{% endif %}
+{%- include "oslo_templates/files/" ~ controller.version ~ "/keystoneauth/_type_" + auth_type + ".conf" %}
+{%- else %}
# PEM encoded Certificate Authority to use when verifying HTTPs connections.
# (string value)
@@ -8320,6 +8330,7 @@
# Tenant Name (string value)
#tenant_name = <None>
+{%- endif %}
[spice]
diff --git a/nova/map.jinja b/nova/map.jinja
index b5a71d6..21fb923 100644
--- a/nova/map.jinja
+++ b/nova/map.jinja
@@ -299,19 +299,5 @@
'warn': '15%',
'crit': '5%',
},
- 'error_log_rate': {
- 'warn': 0.2,
- },
- 'services_failed_warning_threshold_percent': 0.3,
- 'services_failed_critical_threshold_percent': 0.6,
- 'computes_failed_warning_threshold_percent': 0.25,
- 'computes_failed_critical_threshold_percent': 0.5,
- 'cpu_minor_threshold': 0.85,
- 'cpu_major_threshold': 0.95,
- 'ram_major_threshold': 0.85,
- 'ram_critical_threshold': 0.95,
- 'disk_major_threshold': 0.85,
- 'disk_critical_threshold': 0.95,
- 'endpoint_failed_major_threshold': 0.5,
},
}, grain='os_family', merge=salt['pillar.get']('nova:monitoring')) %}
diff --git a/nova/meta/prometheus.yml b/nova/meta/prometheus.yml
index 001a9ea..fbebd2d 100644
--- a/nova/meta/prometheus.yml
+++ b/nova/meta/prometheus.yml
@@ -1,4 +1,4 @@
-{% from "nova/map.jinja" import controller, compute, monitoring with context %}
+{% from "nova/map.jinja" import controller, compute with context %}
{%- set is_controller = controller.get('enabled', False) %}
{%- set is_compute = compute.get('enabled', False) %}
@@ -30,11 +30,6 @@
server:
alert:
{%- if is_controller %}
-{%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
-{%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
-{%- set minor_compute_threshold = monitoring.computes_failed_warning_threshold_percent|float %}
-{%- set major_compute_threshold = monitoring.computes_failed_critical_threshold_percent|float %}
-{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
{% raw %}
NovaApiOutage:
if: >-
@@ -67,18 +62,17 @@
summary: "nova-api endpoint is not accessible"
description: >-
The nova-api endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.
-{%- endraw %}
NovaApiEndpointsDownMajor:
if: >-
- count(http_response_status{name=~"nova-api"} == 0) >= count(http_response_status{name=~"nova-api"}) * {{ major_endpoint_threshold }}
+ count(http_response_status{name=~"nova-api"} == 0) >= count(http_response_status{name=~"nova-api"}) * 0.6
for: 2m
labels:
severity: major
service: nova
annotations:
- summary: "{{major_endpoint_threshold * 100}}% of nova-api endpoints are not accessible"
+ summary: "60% of nova-api endpoints are not accessible"
description: >-
- {% raw %}{{ $value }} nova-api endpoints (>= {% endraw %} {{major_endpoint_threshold * 100}}{% raw %}%) are not accessible for 2 minutes.
+ More than 60% of nova-api endpoints are not accessible for 2 minutes.
NovaApiEndpointsOutage:
if: >-
count(http_response_status{name=~"nova-api"} == 0) == count(http_response_status{name=~"nova-api"})
@@ -100,47 +94,46 @@
summary: "{{ $labels.binary }} service is down"
description: >-
The {{ $labels.binary }} service on the {{ $labels.hostname }} node is down.
-{%- endraw %}
NovaServicesDownMinor:
if: >-
- count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * {{minor_threshold}}
+ count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * 0.3
labels:
severity: minor
service: nova
annotations:
- summary: "{{minor_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down"
+ summary: "30% of {{ $labels.binary }} services are down"
description: >-
- {{ $value }} {{ $labels.binary }} services (>= {%- endraw %} {{minor_threshold * 100}}%) are down.
+ More than 30% {{ $labels.binary }} services are down.
NovaComputeServicesDownMinor:
if: >-
- count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * {{minor_compute_threshold}}
+ count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * 0.25
labels:
severity: minor
service: nova
annotations:
- summary: "{{minor_compute_threshold * 100}}%{%- raw %} of nova-compute services are down"
+ summary: "More than 25% of nova-compute services are down"
description: >-
- {{ $value }} nova-compute services (>= {%- endraw %} {{minor_compute_threshold * 100}}%) are down.
+ More than 25% of nova-compute services are down.
NovaServicesDownMajor:
if: >-
- count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * {{major_threshold}}
+ count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * 0.6
labels:
severity: major
service: nova
annotations:
- summary: "{{major_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down"
+ summary: "More than 60% of {{ $labels.binary }} services are down"
description: >-
- {{ $value }} {{ $labels.binary }} services (>= {%- endraw %} {{major_threshold * 100}}%) are down.
+ More than 60% of {{ $labels.binary }} services are down.
NovaComputeServicesDownMajor:
if: >-
- count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * {{major_compute_threshold}}
+ count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * 0.5
labels:
severity: major
service: nova
annotations:
- summary: "{{major_compute_threshold * 100}}%{%- raw %} of nova-compute services are down"
+ summary: "More than 50% of nova-compute services are down"
description: >-
- {{ $value }} nova-compute services (>= {%- endraw %} {{major_compute_threshold * 100}}{%- raw %}%) are down.
+ More than 50% of nova-compute services are down.
NovaServiceOutage:
if: >-
count(openstack_nova_service_state == 0) by (binary) == on (binary) count(openstack_nova_service_state) by (binary)
@@ -152,168 +145,17 @@
description: >-
All {{ $labels.binary }} services are down.
{%- endraw %}
-{%- set cpu_minor_threshold = monitoring.cpu_minor_threshold|float %}
-{%- set cpu_major_threshold = monitoring.cpu_major_threshold|float %}
-{%- set ram_major_threshold = monitoring.ram_major_threshold|float %}
-{%- set ram_critical_threshold = monitoring.ram_critical_threshold|float %}
-{%- set disk_major_threshold = monitoring.disk_major_threshold|float %}
-{%- set disk_critical_threshold = monitoring.disk_critical_threshold|float %}
- NovaHypervisorVCPUsFullMinor:
- if: >-
- label_replace(system_load15, "hostname", "$1", "host", "(.*)") > on (hostname) openstack_nova_vcpus * {{ cpu_minor_threshold }}
- labels:
- severity: minor
- service: nova
- annotations:
- summary: "{{ cpu_minor_threshold * 100 }}% of hypervisor VCPUs are used"
- description: "{% raw %}{{ $value }} VCPUs on the {{ $labels.hostname }} node (> {% endraw %} {{ cpu_minor_threshold * 100 }}%) are used."
- NovaHypervisorVCPUsFullMajor:
- if: >-
- label_replace(system_load15, "hostname", "$1", "host", "(.*)") > on (hostname) openstack_nova_vcpus * {{ cpu_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ cpu_major_threshold * 100 }}% of hypervisor VCPUs are used"
- description: "{% raw %}{{ $value }} VCPUs on the {{ $labels.hostname }} node (> {% endraw %} {{ cpu_major_threshold * 100 }}%) are used."
- NovaHypervisorMemoryFullMajor:
- if: >-
- openstack_nova_used_ram > openstack_nova_ram * {{ ram_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ ram_major_threshold * 100 }}% of hypervisor RAM is used"
- description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.hostname }} node (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
- NovaHypervisorMemoryFullCritical:
- if: >-
- openstack_nova_used_ram > openstack_nova_ram * {{ ram_critical_threshold }}
- labels:
- severity: critical
- service: nova
- annotations:
- summary: "{{ ram_critical_threshold * 100 }}% of hypervisor RAM is used"
- description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.hostname }} node (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
- NovaHypervisorDiskFullMajor:
- if: >-
- openstack_nova_used_disk > openstack_nova_disk * {{ disk_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ disk_major_threshold * 100 }}% of hypervisor disk space is used"
- description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.hostname }} node (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
- NovaHypervisorDiskFullCritical:
- if: >-
- openstack_nova_used_disk > openstack_nova_disk * {{ disk_critical_threshold }}
- labels:
- severity: critical
- service: nova
- annotations:
- summary: "{{ disk_critical_threshold * 100 }}% of hypervisor disk space is used"
- description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.hostname }} node (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
- NovaAggregateMemoryFullMajor:
- if: >-
- openstack_nova_aggregate_used_ram > openstack_nova_aggregate_ram * {{ ram_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ ram_major_threshold * 100 }}% of aggregate RAM is used"
- description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
- NovaAggregateMemoryFullCritical:
- if: >-
- openstack_nova_aggregate_used_ram > openstack_nova_aggregate_ram * {{ ram_critical_threshold }}
- labels:
- severity: critical
- service: nova
- annotations:
- summary: "{{ ram_critical_threshold * 100 }}% of aggregate RAM is used"
- description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
- NovaAggregateDiskFullMajor:
- if: >-
- openstack_nova_aggregate_used_disk > openstack_nova_aggregate_disk * {{ disk_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ disk_major_threshold * 100 }}% of aggregate disk space is used"
- description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
- NovaAggregateDiskFullCritical:
- if: >-
- openstack_nova_aggregate_used_disk > openstack_nova_aggregate_disk * {{ disk_critical_threshold }}
- labels:
- severity: critical
- service: nova
- annotations:
- summary: "{{ disk_critical_threshold * 100 }}% of aggregate disk space is used"
- description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
- NovaTotalVCPUsFullMinor:
- if: >-
- sum(label_replace(system_load15, "hostname", "$1", "host", "(.*)") and on (hostname) openstack_nova_vcpus) > max(sum(openstack_nova_vcpus) by (instance)) * {{ cpu_minor_threshold }}
- labels:
- severity: minor
- service: nova
- annotations:
- summary: "{{ cpu_minor_threshold * 100 }}% of cloud VCPUs are used"
- description: "{% raw %}{{ $value }} VCPUs in the cloud (> {% endraw %} {{ cpu_minor_threshold * 100 }}%) are used."
- NovaTotalVCPUsFullMajor:
- if: >-
- sum(label_replace(system_load15, "hostname", "$1", "host", "(.*)") and on (hostname) openstack_nova_vcpus) > max(sum(openstack_nova_vcpus) by (instance)) * {{ cpu_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ cpu_major_threshold * 100 }}% of cloud VCPUs are used"
- description: "{% raw %}{{ $value }} VCPUs in the cloud (> {% endraw %} {{ cpu_major_threshold * 100 }}%) are used."
- NovaTotalMemoryFullMajor:
- if: >-
- openstack_nova_total_used_ram > openstack_nova_total_ram * {{ ram_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ ram_major_threshold * 100 }}% of cloud RAM is used"
- description: "{% raw %}{{ $value }}MB of RAM in the cloud (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
- NovaTotalMemoryFullCritical:
- if: >-
- openstack_nova_total_used_ram > openstack_nova_total_ram * {{ ram_critical_threshold }}
- labels:
- severity: critical
- service: nova
- annotations:
- summary: "{{ ram_critical_threshold * 100 }}% of cloud RAM is used"
- description: "{% raw %}{{ $value }}MB of RAM in the cloud (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
- NovaTotalDiskFullMajor:
- if: >-
- openstack_nova_total_used_disk > openstack_nova_total_disk * {{ disk_major_threshold }}
- labels:
- severity: major
- service: nova
- annotations:
- summary: "{{ disk_major_threshold * 100 }}% of cloud disk space is used"
- description: "{% raw %}{{ $value }}GB of disk space in the cloud (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
- NovaTotalDiskFullCritical:
- if: >-
- openstack_nova_total_used_disk > openstack_nova_total_disk * {{ disk_critical_threshold }}
- labels:
- severity: critical
- service: nova
- annotations:
- summary: "{{ disk_critical_threshold * 100 }}% of cloud disk space is used"
- description: "{% raw %}{{ $value }}GB of disk space in the cloud (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
{%- endif %}
- NovaErrorLogsTooHigh:
- {%- set log_threshold = monitoring.error_log_rate.warn|float %}
- if: >-
- sum(rate(log_messages{service="nova",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > {{ log_threshold }}
{%- raw %}
+ NovaErrorLogsTooHigh:
+ if: >-
+ sum(rate(log_messages{service="nova",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > 0.2
labels:
severity: warning
service: nova
annotations:
summary: "High number of errors in Nova logs"
- description: "The average per-second rate of errors in Nova logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes)."
+ description: "The average rate of errors in Nova logs on the {{ $labels.host }} node is more than 0.2 error messages per second (as measured over the last 5 minutes)."
{%- endraw %}
{%- if is_compute and exporters is defined %}
{%- raw %}
diff --git a/tests/pillar/compute_cluster.sls b/tests/pillar/compute_cluster.sls
index 378de4f..53ff9c6 100644
--- a/tests/pillar/compute_cluster.sls
+++ b/tests/pillar/compute_cluster.sls
@@ -14,6 +14,7 @@
vnc_keymap: en-gb
resume_guests_state_on_host_boot: True
preallocate_images: space
+ instance_build_timeout: 600
bind:
vnc_address: 127.0.0.1
vnc_port: 6080
diff --git a/tests/pillar/compute_cluster_vmware.sls b/tests/pillar/compute_cluster_vmware.sls
index 8953178..27de501 100644
--- a/tests/pillar/compute_cluster_vmware.sls
+++ b/tests/pillar/compute_cluster_vmware.sls
@@ -21,6 +21,13 @@
user: nova
password: password
tenant: service
+ service_user:
+ enabled: True
+ user_domain_id: default
+ project_domain_id: default
+ project_name: service
+ username: nova
+ password: pswd
logging:
log_appender: false
log_handlers:
diff --git a/tests/pillar/control_cluster.sls b/tests/pillar/control_cluster.sls
index 397d659..cc88269 100644
--- a/tests/pillar/control_cluster.sls
+++ b/tests/pillar/control_cluster.sls
@@ -12,6 +12,7 @@
ram_allocation_ratio: 1.5
disk_allocation_ratio: 1.0
workers: 8
+ instance_build_timeout: 600
bind:
private_address: 127.0.0.1
public_address: 127.0.0.1
@@ -42,6 +43,14 @@
user: nova
password: password
tenant: service
+ service_user:
+ enabled: True
+ user_domain_id: default
+ project_domain_id: default
+ project_name: service
+ username: nova
+ password: pswd
+
logging:
log_appender: true
log_handlers: