Merge "Un-hardcoding stricthostkeychecking for nova user" into release/2019.2.0
diff --git a/_states/novav21.py b/_states/novav21.py
index b9f9dd0..7b6452b 100644
--- a/_states/novav21.py
+++ b/_states/novav21.py
@@ -361,6 +361,43 @@
     return ret
 
 
+def instances_mapped_to_cell(name, timeout=60, runas='nova'):
+    """Ensure that all instances in the cell are mapped
+
+    :param name: cell name.
+    :param timeout: amount of time in seconds mapping process should finish in.
+    :param runas: username to run the shell commands under.
+    """
+    test = __opts__.get('test', False)
+    cell_uuid = __salt__['cmd.shell'](
+        "nova-manage cell_v2 list_cells 2>/dev/null | "
+        "awk '/%s/ {print $4}'" % name, runas=runas)
+    result = {'name': name, 'changes': {}, 'result': False}
+    if not cell_uuid:
+        result['comment'] = (
+            'Failed to map all instances in cell {0}, it does not exist'
+            .format(name))
+        return result
+    start_time = time.time()
+    if not test:
+        while True:
+            rc = __salt__['cmd.retcode'](
+                'nova-manage cell_v2 map_instances --cell_uuid %s' % cell_uuid,
+                runas=runas)
+            if rc == 0 or time.time() - start_time > timeout:
+                break
+        if rc != 0:
+            result['comment'] = (
+                'Failed to map all instances in cell {0} in {1} seconds'
+                .format(name, timeout))
+            return result
+    result['comment'] = 'All instances mapped in cell {0}'.format(name)
+    if test:
+        result['comment'] = 'TEST: {}'.format(result['comment'])
+    result['result'] = True
+    return result
+
+
 def _db_version_update(db, version, human_readable_resource_name):
     existing_version = __salt__['cmd.shell'](
         'nova-manage %s version 2>/dev/null' % db)
diff --git a/nova/map.jinja b/nova/map.jinja
index 370f517..d9fea7b 100644
--- a/nova/map.jinja
+++ b/nova/map.jinja
@@ -295,20 +295,6 @@
               'warn': '15%',
               'crit': '5%',
         },
-        'error_log_rate': {
-              'warn': 0.2,
-        },
-        'services_failed_warning_threshold_percent': 0.3,
-        'services_failed_critical_threshold_percent': 0.6,
-        'computes_failed_warning_threshold_percent': 0.25,
-        'computes_failed_critical_threshold_percent': 0.5,
-        'cpu_minor_threshold': 0.85,
-        'cpu_major_threshold': 0.95,
-        'ram_major_threshold': 0.85,
-        'ram_critical_threshold': 0.95,
-        'disk_major_threshold': 0.85,
-        'disk_critical_threshold': 0.95,
-        'endpoint_failed_major_threshold': 0.5,
     },
 }, grain='os_family', merge=salt['pillar.get']('nova:monitoring')) %}
 
diff --git a/nova/meta/prometheus.yml b/nova/meta/prometheus.yml
index 1950e9b..f3e12ee 100644
--- a/nova/meta/prometheus.yml
+++ b/nova/meta/prometheus.yml
@@ -1,4 +1,4 @@
-{% from "nova/map.jinja" import controller, compute, monitoring with context %}
+{% from "nova/map.jinja" import controller, compute with context %}
 
 {%- set is_controller = controller.get('enabled', False) %}
 {%- set is_compute = compute.get('enabled', False) %}
@@ -29,11 +29,6 @@
 server:
   alert:
 {%- if is_controller %}
-{%- set minor_threshold = monitoring.services_failed_warning_threshold_percent|float %}
-{%- set major_threshold = monitoring.services_failed_critical_threshold_percent|float %}
-{%- set minor_compute_threshold = monitoring.computes_failed_warning_threshold_percent|float %}
-{%- set major_compute_threshold = monitoring.computes_failed_critical_threshold_percent|float %}
-{%- set major_endpoint_threshold = monitoring.endpoint_failed_major_threshold|float %}
 {% raw %}
     NovaApiOutage:
       if: >-
@@ -66,18 +61,17 @@
         summary: "nova-api endpoint is not accessible"
         description: >-
           The nova-api endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.
-{%- endraw %}
     NovaApiEndpointsDownMajor:
       if: >-
-        count(http_response_status{name=~"nova-api"} == 0) >= count(http_response_status{name=~"nova-api"}) * {{ major_endpoint_threshold }}
+        count(http_response_status{name=~"nova-api"} == 0) >= count(http_response_status{name=~"nova-api"}) * 0.6
       for: 2m
       labels:
         severity: major
         service: nova
       annotations:
-        summary: "{{major_endpoint_threshold * 100}}% of nova-api endpoints are not accessible"
+        summary: "60% of nova-api endpoints are not accessible"
         description: >-
-          {% raw %}{{ $value }} nova-api endpoints (>= {% endraw %} {{major_endpoint_threshold * 100}}{% raw %}%) are not accessible for 2 minutes.
+          More than 60% of nova-api endpoints are not accessible for 2 minutes.
     NovaApiEndpointsOutage:
       if: >-
         count(http_response_status{name=~"nova-api"} == 0) == count(http_response_status{name=~"nova-api"})
@@ -99,47 +93,46 @@
         summary: "{{ $labels.binary }} service is down"
         description: >-
           The {{ $labels.binary }} service on the {{ $labels.hostname }} node is down.
-{%- endraw %}
     NovaServicesDownMinor:
       if: >-
-        count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * {{minor_threshold}}
+        count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * 0.3
       labels:
         severity: minor
         service: nova
       annotations:
-        summary: "{{minor_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down"
+        summary: "30% of {{ $labels.binary }} services are down"
         description: >-
-          {{ $value }} {{ $labels.binary }} services (>= {%- endraw %} {{minor_threshold * 100}}%) are down.
+          More than 30% {{ $labels.binary }} services are down.
     NovaComputeServicesDownMinor:
       if: >-
-        count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * {{minor_compute_threshold}}
+        count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * 0.25
       labels:
         severity: minor
         service: nova
       annotations:
-        summary: "{{minor_compute_threshold * 100}}%{%- raw %} of nova-compute services are down"
+        summary: "More than 25% of nova-compute services are down"
         description: >-
-          {{ $value }} nova-compute services (>= {%- endraw %} {{minor_compute_threshold * 100}}%) are down.
+          More than 25% of nova-compute services are down.
     NovaServicesDownMajor:
       if: >-
-        count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * {{major_threshold}}
+        count(openstack_nova_service_state{binary!~"nova-compute"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~"nova-compute"}) by (binary) * 0.6
       labels:
         severity: major
         service: nova
       annotations:
-        summary: "{{major_threshold * 100}}%{%- raw %} of {{ $labels.binary }} services are down"
+        summary: "More than 60% of {{ $labels.binary }} services are down"
         description: >-
-          {{ $value }} {{ $labels.binary }} services (>= {%- endraw %} {{major_threshold * 100}}%) are down.
+          More than 60% of {{ $labels.binary }} services are down.
     NovaComputeServicesDownMajor:
       if: >-
-        count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * {{major_compute_threshold}}
+        count(openstack_nova_service_state{binary="nova-compute"} == 0) >= count(openstack_nova_service_state{binary="nova-compute"}) * 0.5
       labels:
         severity: major
         service: nova
       annotations:
-        summary: "{{major_compute_threshold * 100}}%{%- raw %} of nova-compute services are down"
+        summary: "More than 50% of nova-compute services are down"
         description: >-
-          {{ $value }} nova-compute services (>= {%- endraw %} {{major_compute_threshold * 100}}{%- raw %}%) are down.
+          More than 50% of nova-compute services are down.
     NovaServiceOutage:
       if: >-
         count(openstack_nova_service_state == 0) by (binary) == on (binary) count(openstack_nova_service_state) by (binary)
@@ -151,168 +144,17 @@
         description: >-
           All {{ $labels.binary }} services are down.
 {%- endraw %}
-{%- set cpu_minor_threshold = monitoring.cpu_minor_threshold|float %}
-{%- set cpu_major_threshold = monitoring.cpu_major_threshold|float %}
-{%- set ram_major_threshold = monitoring.ram_major_threshold|float %}
-{%- set ram_critical_threshold = monitoring.ram_critical_threshold|float %}
-{%- set disk_major_threshold = monitoring.disk_major_threshold|float %}
-{%- set disk_critical_threshold = monitoring.disk_critical_threshold|float %}
-    NovaHypervisorVCPUsFullMinor:
-      if: >-
-        label_replace(system_load15, "hostname", "$1", "host", "(.*)") > on (hostname) openstack_nova_vcpus * {{ cpu_minor_threshold }}
-      labels:
-        severity: minor
-        service: nova
-      annotations:
-        summary: "{{ cpu_minor_threshold * 100 }}% of hypervisor VCPUs are used"
-        description: "{% raw %}{{ $value }} VCPUs on the {{ $labels.hostname }} node (> {% endraw %} {{ cpu_minor_threshold * 100 }}%) are used."
-    NovaHypervisorVCPUsFullMajor:
-      if: >-
-        label_replace(system_load15, "hostname", "$1", "host", "(.*)") > on (hostname) openstack_nova_vcpus * {{ cpu_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ cpu_major_threshold * 100 }}% of hypervisor VCPUs are used"
-        description: "{% raw %}{{ $value }} VCPUs on the {{ $labels.hostname }} node (> {% endraw %} {{ cpu_major_threshold * 100 }}%) are used."
-    NovaHypervisorMemoryFullMajor:
-      if: >-
-        openstack_nova_used_ram > openstack_nova_ram * {{ ram_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ ram_major_threshold * 100 }}% of hypervisor RAM is used"
-        description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.hostname }} node (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
-    NovaHypervisorMemoryFullCritical:
-      if: >-
-        openstack_nova_used_ram > openstack_nova_ram * {{ ram_critical_threshold }}
-      labels:
-        severity: critical
-        service: nova
-      annotations:
-        summary: "{{ ram_critical_threshold * 100 }}% of hypervisor RAM is used"
-        description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.hostname }} node (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
-    NovaHypervisorDiskFullMajor:
-      if: >-
-        openstack_nova_used_disk > openstack_nova_disk * {{ disk_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ disk_major_threshold * 100 }}% of hypervisor disk space is used"
-        description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.hostname }} node (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
-    NovaHypervisorDiskFullCritical:
-      if: >-
-        openstack_nova_used_disk > openstack_nova_disk * {{ disk_critical_threshold }}
-      labels:
-        severity: critical
-        service: nova
-      annotations:
-        summary: "{{ disk_critical_threshold * 100 }}% of hypervisor disk space is used"
-        description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.hostname }} node (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
-    NovaAggregateMemoryFullMajor:
-      if: >-
-        openstack_nova_aggregate_used_ram > openstack_nova_aggregate_ram * {{ ram_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ ram_major_threshold * 100 }}% of aggregate RAM is used"
-        description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
-    NovaAggregateMemoryFullCritical:
-      if: >-
-        openstack_nova_aggregate_used_ram > openstack_nova_aggregate_ram * {{ ram_critical_threshold }}
-      labels:
-        severity: critical
-        service: nova
-      annotations:
-        summary: "{{ ram_critical_threshold * 100 }}% of aggregate RAM is used"
-        description: "{% raw %}{{ $value }}MB of RAM on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
-    NovaAggregateDiskFullMajor:
-      if: >-
-        openstack_nova_aggregate_used_disk > openstack_nova_aggregate_disk * {{ disk_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ disk_major_threshold * 100 }}% of aggregate disk space is used"
-        description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
-    NovaAggregateDiskFullCritical:
-      if: >-
-        openstack_nova_aggregate_used_disk > openstack_nova_aggregate_disk * {{ disk_critical_threshold }}
-      labels:
-        severity: critical
-        service: nova
-      annotations:
-        summary: "{{ disk_critical_threshold * 100 }}% of aggregate disk space is used"
-        description: "{% raw %}{{ $value }}GB of disk space on the {{ $labels.aggregate }} aggregate (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
-    NovaTotalVCPUsFullMinor:
-      if: >-
-        sum(label_replace(system_load15, "hostname", "$1", "host", "(.*)") and on (hostname) openstack_nova_vcpus) > max(sum(openstack_nova_vcpus) by (instance)) * {{ cpu_minor_threshold }}
-      labels:
-        severity: minor
-        service: nova
-      annotations:
-        summary: "{{ cpu_minor_threshold * 100 }}% of cloud VCPUs are used"
-        description: "{% raw %}{{ $value }} VCPUs in the cloud (> {% endraw %} {{ cpu_minor_threshold * 100 }}%) are used."
-    NovaTotalVCPUsFullMajor:
-      if: >-
-        sum(label_replace(system_load15, "hostname", "$1", "host", "(.*)") and on (hostname) openstack_nova_vcpus) > max(sum(openstack_nova_vcpus) by (instance)) * {{ cpu_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ cpu_major_threshold * 100 }}% of cloud VCPUs are used"
-        description: "{% raw %}{{ $value }} VCPUs in the cloud (> {% endraw %} {{ cpu_major_threshold * 100 }}%) are used."
-    NovaTotalMemoryFullMajor:
-      if: >-
-        openstack_nova_total_used_ram > openstack_nova_total_ram * {{ ram_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ ram_major_threshold * 100 }}% of cloud RAM is used"
-        description: "{% raw %}{{ $value }}MB of RAM in the cloud (> {% endraw %} {{ ram_major_threshold * 100 }}%) is used."
-    NovaTotalMemoryFullCritical:
-      if: >-
-        openstack_nova_total_used_ram > openstack_nova_total_ram * {{ ram_critical_threshold }}
-      labels:
-        severity: critical
-        service: nova
-      annotations:
-        summary: "{{ ram_critical_threshold * 100 }}% of cloud RAM is used"
-        description: "{% raw %}{{ $value }}MB of RAM in the cloud (> {% endraw %} {{ ram_critical_threshold * 100 }}%) is used."
-    NovaTotalDiskFullMajor:
-      if: >-
-        openstack_nova_total_used_disk > openstack_nova_total_disk * {{ disk_major_threshold }}
-      labels:
-        severity: major
-        service: nova
-      annotations:
-        summary: "{{ disk_major_threshold * 100 }}% of cloud disk space is used"
-        description: "{% raw %}{{ $value }}GB of disk space in the cloud (> {% endraw %} {{ disk_major_threshold * 100 }}%) is used."
-    NovaTotalDiskFullCritical:
-      if: >-
-        openstack_nova_total_used_disk > openstack_nova_total_disk * {{ disk_critical_threshold }}
-      labels:
-        severity: critical
-        service: nova
-      annotations:
-        summary: "{{ disk_critical_threshold * 100 }}% of cloud disk space is used"
-        description: "{% raw %}{{ $value }}GB of disk space in the cloud (> {% endraw %} {{ disk_critical_threshold * 100 }}%) is used."
 {%- endif %}
-    NovaErrorLogsTooHigh:
-      {%- set log_threshold = monitoring.error_log_rate.warn|float %}
-      if: >-
-        sum(rate(log_messages{service="nova",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > {{ log_threshold }}
 {%- raw %}
+    NovaErrorLogsTooHigh:
+      if: >-
+        sum(rate(log_messages{service="nova",level=~"(?i:(error|emergency|fatal))"}[5m])) without (level) > 0.2
       labels:
         severity: warning
         service: nova
       annotations:
         summary: "High number of errors in Nova logs"
-        description: "The average per-second rate of errors in Nova logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes)."
+        description: "The average rate of errors in Nova logs on the {{ $labels.host }} node is more than 0.2 error messages per second (as measured over the last 5 minutes)."
 {%- endraw %}
 {%- if is_compute and exporters is defined and compute.get('compute_driver', 'libvirt.LibvirtDriver') == 'libvirt.LibvirtDriver'%}
 {%- raw %}
diff --git a/nova/upgrade/pre/init.sls b/nova/upgrade/pre/init.sls
index 9ed049a..0c5834e 100644
--- a/nova/upgrade/pre/init.sls
+++ b/nova/upgrade/pre/init.sls
@@ -16,7 +16,7 @@
 /etc/nova/nova.conf:
   file.managed:
   - name: /etc/nova/nova.conf
-  - source: salt://nova/files/{{ _data.version }}/nova-{{ type }}.conf.{{ grains.os_family }}
+  - source: salt://nova/files/{{ upgrade.old_release }}/nova-{{ type }}.conf.{{ grains.os_family }}
   - template: jinja
 
 {%- if controller.get('enabled') %}