Merge "Ensure both severities are used in test data"

commit: 2985e2d88e59920016b9588ba83bf1f8e58e941d [log] [tgz]
author: Zuul <zuul@review.opendev.org> Fri Apr 26 12:48:45 2024 +0000
committer: Gerrit Code Review <review@openstack.org> Fri Apr 26 12:48:45 2024 +0000
tree: af810f2cb60042d93eac4f35f89098d9af5943ed
parent: 671190526d00dba2f132521fe9664cbb1eada07b [diff]
parent: 50c2bfcf553a940d44c6fe0799c8cd3a9412ac79 [diff]
diff --git a/.zuul.yaml b/.zuul.yaml
index 647dd54..b1dc084 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml

@@ -18,7 +18,6 @@
       - openstack/telemetry-tempest-plugin
       - openstack/heat-tempest-plugin
       - openstack/heat
-      - openstack/dib-utils
       - openstack/diskimage-builder
       - openstack/tempest
       - gnocchixyz/gnocchi
@@ -31,6 +30,9 @@
         aodh: https://opendev.org/openstack/aodh
         panko: https://opendev.org/openstack/panko
         sg-core: https://github.com/infrawatch/sg-core
+      # NOTE(jokke): The following will disable the gabbi based integration tests for now.
+      # We will need to figure out how we refactor them to be stable in the CI.
+      tempest_exclude_regex: (^telemetry_tempest_plugin\.scenario\.test_telemetry_integration\.)
       devstack_services:
         tempest: true
       devstack_localrc:
@@ -45,6 +47,7 @@
         # be "gnocchi,sg-core"
         CEILOMETER_BACKEND: "gnocchi"
         CEILOMETER_BACKENDS: "gnocchi,sg-core"
+        PROMETHEUS_SERVICE_SCRAPE_TARGETS: "sg-core"
         CEILOMETER_PIPELINE_INTERVAL: 15
         CEILOMETER_ALARM_THRESHOLD: 6000000000
         GLOBAL_VENV: False
@@ -57,6 +60,7 @@
               metric_backends: gnocchi,prometheus
             telemetry:
               disable_ssl_certificate_validation: True
+              ceilometer_polling_interval: 15
       tempest_test_regex: telemetry_tempest_plugin
       tox_envlist: all
 
@@ -122,7 +126,6 @@
     parent: telemetry-dsvm-integration
     description: |
       Telemetry devstack tempest tests job for a Centos 9 stream system
-    voting: false
 
 - job:
     name: telemetry-dsvm-integration-centos-9s-fips
@@ -132,7 +135,6 @@
     pre-run: playbooks/enable-fips.yaml
     vars:
       nslookup_target: 'opendev.org'
-    voting: false
 
 - project:
     queue: telemetry

diff --git a/telemetry_tempest_plugin/config.py b/telemetry_tempest_plugin/config.py
index c8f757b..8ff90aa 100644
--- a/telemetry_tempest_plugin/config.py
+++ b/telemetry_tempest_plugin/config.py

@@ -76,6 +76,9 @@
     cfg.IntOpt('alarm_threshold',
                default=10,
                help="Threshold to cross for the alarm to trigger."),
+    cfg.IntOpt('scaledown_alarm_threshold',
+               default=2000000000,
+               help="Threshold to cross for the alarm to trigger."),
     cfg.BoolOpt("disable_ssl_certificate_validation",
                 default=False,
                 help="Disable SSL certificate validation when running "
@@ -83,7 +86,13 @@
     cfg.StrOpt('sg_core_service_url',
                default="127.0.0.1:3000",
                help="URL to sg-core prometheus endpoint"),
-
+    cfg.StrOpt('prometheus_service_url',
+               default="127.0.0.1:9090",
+               help="URL to prometheus endpoint"),
+    cfg.IntOpt('ceilometer_polling_interval',
+               default=300,
+               help="Polling interval configured for ceilometer. This can "
+                    "be used in test cases to wait for metrics to appear.")
 ]
 
 telemetry_services_opts = [

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
new file mode 100644
index 0000000..abe0293
--- /dev/null
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml

@@ -0,0 +1,162 @@
+defaults:
+    request_headers:
+        x-auth-token: $ENVIRON['USER_TOKEN']
+
+tests:
+    - name: list alarms none
+      desc: Lists alarms, none yet exist
+      verbose: all
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: list servers none
+      desc: List servers, none yet exists
+      verbose: all
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: create stack
+      desc: Create an autoscaling stack
+      verbose: all
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks
+      method: POST
+      request_headers:
+          content-type: application/json
+      data: <@create_stack.json
+      status: 201
+
+    - name: control stack status
+      desc: Checks the stack have been created successfully
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      redirects: true
+      verbose: all
+      method: GET
+      status: 200
+      poll:
+          count: 300
+          delay: 1
+      response_json_paths:
+          $.stack.stack_status: "CREATE_COMPLETE"
+
+    - name: list servers grow
+      verbose: all
+      desc: Wait the autoscaling stack grow to two servers
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
+      method: GET
+      poll:
+          count: 600
+          delay: 1
+      response_json_paths:
+          $.servers[0].metadata.'metering.server_group': $RESPONSE['$.stack.id']
+          $.servers[1].metadata.'metering.server_group': $RESPONSE['$.stack.id']
+          $.servers[0].status: ACTIVE
+          $.servers[1].status: ACTIVE
+          $.servers.`len`: 2
+
+    - name: check prometheus query for the servers count .
+      desc: Check the Prometheus metric for the existence of servers
+      url: $ENVIRON['PROMETHEUS_SERVICE_URL']/api/v1/query
+      verbose: all
+      method: POST
+      request_headers:
+          content-type: application/x-www-form-urlencoded
+      data:
+         query=ceilometer_cpu{resource_name=~"te-$ENVIRON['RESOURCE_PREFIX'].*"}
+      poll:
+          count: 300
+          delay: 1
+      status: 200
+      response_json_paths:
+          $.data.result.`len`: 2
+
+    - name: check alarm cpu_alarm_high ALARM
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_high"
+      response_json_paths:
+          $[0].state: alarm
+
+    - name: check alarm cpu_alarm_high is OK
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 900
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_high-"
+      response_json_paths:
+          $[0].state: ok
+
+    - name: check alarm cpu_alarm_low is ALARM
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_low-"
+      response_json_paths:
+          $[1].state: alarm
+
+    - name: list servers shrink
+      verbose: all
+      desc: Wait for the autoscaling stack to delete one server
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
+      method: GET
+      poll:
+          count: 600
+          delay: 1
+      response_json_paths:
+          $.servers[0].metadata.'metering.server_group': $HISTORY['control stack status'].$RESPONSE['$.stack.id']
+          $.servers[0].status: ACTIVE
+          $.servers.`len`: 1
+
+    - name: get stack location
+      desc: Get the stack location
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      method: GET
+      status: 302
+
+    - name: delete stack
+      desc: Delete the stack
+      url: $LOCATION
+      method: DELETE
+      status: 204
+
+    - name: confirm that stack have been deleted
+      desc: Check the stack have been deleted to procced
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      redirects: true
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      status: 404
+
+    - name: list alarms deleted
+      desc: List alarms, no more exist
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: list servers deleted
+      desc: List servers, no more exists
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers
+      method: GET
+      response_strings:
+          - "[]"

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml
index f4cd0b1..5568878 100644
--- a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml

@@ -3,7 +3,7 @@
     desc: Check the sg-core prometheus endpoint for ceilometer metrics
     GET: $ENVIRON['SG_CORE_SERVICE_URL']/metrics
     poll:
-      count: 60
+      count: $ENVIRON['CEILOMETER_POLLING_INTERVAL']
       delay: 2
     response_strings:
       - "ceilometer_image_size"

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
new file mode 100644
index 0000000..4f6962b
--- /dev/null
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json

@@ -0,0 +1,90 @@
+{
+    "stack_name": "$ENVIRON['STACK_NAME']",
+    "template": {
+        "heat_template_version": "2013-05-23",
+        "description": "Integration Test AutoScaling with heat+ceilometer+prometheus+aodh",
+        "resources": {
+            "asg": {
+                "type": "OS::Heat::AutoScalingGroup",
+                "properties": {
+                    "min_size": 1,
+                    "max_size": 2,
+                    "resource": {
+                        "type": "OS::Nova::Server",
+                        "properties": {
+                            "networks": [{ "network": "$ENVIRON['NEUTRON_NETWORK']" }],
+                            "flavor": "$ENVIRON['NOVA_FLAVOR_REF']",
+                            "image": "$ENVIRON['GLANCE_IMAGE_NAME']",
+                            "metadata": {
+                                "metering.server_group": { "get_param": "OS::stack_id" }
+                            },
+                            "user_data_format": "RAW",
+                            "user_data": {"Fn::Join": ["", [
+                                "#!/bin/sh\n",
+                                "echo 'Loading CPU'\n",
+                                "set -v\n",
+                                "cat /dev/urandom > /dev/null & sleep 120 ; kill $! \n"
+                            ]]}
+                        }
+                    }
+                }
+            },
+            "web_server_scaleup_policy": {
+                "type": "OS::Heat::ScalingPolicy",
+                "properties": {
+                    "adjustment_type": "change_in_capacity",
+                    "auto_scaling_group_id": { "get_resource": "asg" },
+                    "cooldown": 60,
+                    "scaling_adjustment": 1
+                }
+            },
+            "cpu_alarm_high": {
+                "type": "OS::Aodh::PrometheusAlarm",
+                "properties": {
+                    "description": "Scale-up if the mean CPU is higher than the threshold",
+                    "threshold": $ENVIRON["AODH_THRESHOLD"],
+                    "comparison_operator": "gt",
+                    "alarm_actions": [
+                        {
+                            "str_replace": {
+                                "template": "trust+url",
+                                "params": {
+                                    "url": { "get_attr": [ "web_server_scaleup_policy", "signal_url" ] }
+                                }
+                            }
+                        }
+                    ],
+                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100"
+                }
+            },
+            "web_server_scaledown_policy": {
+                "type": "OS::Heat::ScalingPolicy",
+                "properties": {
+                    "adjustment_type": "change_in_capacity",
+                    "auto_scaling_group_id": { "get_resource": "asg" },
+                    "cooldown": 60,
+                    "scaling_adjustment": -1
+                }
+            },
+            "cpu_alarm_low": {
+                "type": "OS::Aodh::PrometheusAlarm",
+                "properties": {
+                    "description": "Scale-down if the mean CPU is lower than the threshold",
+                    "threshold": $ENVIRON["SCALEDOWN_THRESHOLD"],
+                    "comparison_operator": "lt",
+                    "alarm_actions": [
+                        {
+                            "str_replace": {
+                                "template": "trust+url",
+                                "params": {
+                                    "url": { "get_attr": [ "web_server_scaledown_policy", "signal_url" ] }
+                                }
+                            }
+                        }
+                    ],
+                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100"
+                }
+            }
+        }
+    }
+}

diff --git a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
index c379470..93d05de 100644
--- a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
+++ b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py

@@ -13,31 +13,96 @@
 import os
 
 from tempest import config
-import tempest.test
+from tempest.lib.common.utils import data_utils
+from tempest.scenario import manager
 
 from telemetry_tempest_plugin.scenario import utils
 
-CONF = config.CONF
 
 TEST_DIR = os.path.join(os.path.dirname(__file__),
                         'telemetry_integration_prometheus_gabbits')
 
 
-class PrometheusGabbiTest(tempest.test.BaseTestCase):
-    credentials = ['admin']
+class PrometheusGabbiTest(manager.ScenarioTest):
+    credentials = ['admin', 'primary']
 
     TIMEOUT_SCALING_FACTOR = 5
 
     @classmethod
     def skip_checks(cls):
         super(PrometheusGabbiTest, cls).skip_checks()
-        if not CONF.service_available.sg_core:
-            raise cls.skipException("sg-core support is required")
+        for name in ["aodh", "nova", "heat",
+                     "ceilometer", "glance", "sg_core"]:
+            cls._check_service(name)
+
+    @classmethod
+    def _check_service(cls, name):
+        if not getattr(config.CONF.service_available, name, False):
+            raise cls.skipException("%s support is required" %
+                                    name.capitalize())
+
+    @staticmethod
+    def _get_endpoint(auth, service):
+        opt_section = getattr(config.CONF, service)
+        endpoint_type = opt_section.endpoint_type
+        is_keystone_v3 = 'catalog' in auth[1]
+
+        if is_keystone_v3:
+            if endpoint_type.endswith("URL"):
+                endpoint_type = endpoint_type[:-3]
+            catalog = auth[1]['catalog']
+            endpoints = [e['endpoints'] for e in catalog
+                         if e['type'] == opt_section.catalog_type]
+            if not endpoints:
+                raise Exception("%s endpoint not found" %
+                                opt_section.catalog_type)
+            endpoints = [e['url'] for e in endpoints[0]
+                         if e['interface'] == endpoint_type]
+            if not endpoints:
+                raise Exception("%s interface not found for endpoint %s" %
+                                (endpoint_type,
+                                 opt_section.catalog_type))
+            return endpoints[0].rstrip('/')
+
+        else:
+            if not endpoint_type.endswith("URL"):
+                endpoint_type += "URL"
+            catalog = auth[1]['serviceCatalog']
+            endpoints = [e for e in catalog
+                         if e['type'] == opt_section.catalog_type]
+            if not endpoints:
+                raise Exception("%s endpoint not found" %
+                                opt_section.catalog_type)
+            return endpoints[0]['endpoints'][0][endpoint_type].rstrip('/')
 
     def _prep_test(self, filename):
+        auth = self.os_primary.auth_provider.get_auth()
+        networks = self.os_primary.networks_client.list_networks(
+            **{'router:external': False, 'fields': 'id'})['networks']
+        stack_name = data_utils.rand_name('telemetry')
+        # NOTE(marihan): This is being used in prometheus query as heat is
+        # using the last 7 digits from stack_name to create the autoscaling
+        # resources.
+        resource_prefix = stack_name[-7:]
         os.environ.update({
+            "USER_TOKEN": auth[0],
+            "AODH_THRESHOLD": str(config.CONF.telemetry.alarm_threshold),
+            "SCALEDOWN_THRESHOLD":
+            str(config.CONF.telemetry.scaledown_alarm_threshold),
+            "AODH_SERVICE_URL": self._get_endpoint(auth, "alarming_plugin"),
+            "HEAT_SERVICE_URL": self._get_endpoint(auth, "heat_plugin"),
+            "NOVA_SERVICE_URL": self._get_endpoint(auth, "compute"),
             "SG_CORE_SERVICE_URL":
-            str(config.CONF.telemetry.sg_core_service_url),
+            config.CONF.telemetry.sg_core_service_url,
+            "CEILOMETER_POLLING_INTERVAL":
+            str(config.CONF.telemetry.ceilometer_polling_interval),
+            "PROMETHEUS_SERVICE_URL":
+            config.CONF.telemetry.prometheus_service_url,
+            "GLANCE_IMAGE_NAME": self.image_create(),
+            "NOVA_FLAVOR_REF": config.CONF.compute.flavor_ref,
+            "NEUTRON_NETWORK": networks[0].get('id'),
+            "STACK_NAME": stack_name,
+            "RESOURCE_PREFIX": resource_prefix,
         })
commit	2985e2d88e59920016b9588ba83bf1f8e58e941d	[log] [tgz]
author	Zuul <zuul@review.opendev.org>	Fri Apr 26 12:48:45 2024 +0000
committer	Gerrit Code Review <review@openstack.org>	Fri Apr 26 12:48:45 2024 +0000
tree	af810f2cb60042d93eac4f35f89098d9af5943ed
parent	671190526d00dba2f132521fe9664cbb1eada07b [diff]
parent	50c2bfcf553a940d44c6fe0799c8cd3a9412ac79 [diff]