Merge "Add basic admin tests for Aodh"

commit: c713d12aa351fb568ddbb5a5db120336dac191ff [log] [tgz]
author: Zuul <zuul@review.opendev.org> Wed Jun 19 14:15:51 2024 +0000
committer: Gerrit Code Review <review@openstack.org> Wed Jun 19 14:15:51 2024 +0000
tree: 52694519bdafae1b6a1805d61eaaaac2710e6e7f
parent: 405a904690ae473b71389c7573ba1ca183d44ea0 [diff]
parent: 615dc197b7faa53492a02714f2f63eb3e9f1aa0e [diff]
diff --git a/.zuul.yaml b/.zuul.yaml
index cf4c861..daa30ec 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml

@@ -11,14 +11,11 @@
       - ^releasenotes/.*$
     timeout: 7800
     required-projects: &base_required_projects
-      - openstack/devstack-gate
-      - openstack/panko
       - openstack/aodh
       - openstack/ceilometer
       - openstack/telemetry-tempest-plugin
       - openstack/heat-tempest-plugin
       - openstack/heat
-      - openstack/dib-utils
       - openstack/diskimage-builder
       - openstack/tempest
       - gnocchixyz/gnocchi
@@ -29,8 +26,10 @@
         heat: https://opendev.org/openstack/heat
         ceilometer: https://opendev.org/openstack/ceilometer
         aodh: https://opendev.org/openstack/aodh
-        panko: https://opendev.org/openstack/panko
         sg-core: https://github.com/infrawatch/sg-core
+      # NOTE(jokke): The following will disable the gabbi based integration tests for now.
+      # We will need to figure out how we refactor them to be stable in the CI.
+      tempest_exclude_regex: (^telemetry_tempest_plugin\.scenario\.test_telemetry_integration\.)
       devstack_services:
         tempest: true
       devstack_localrc:
@@ -45,6 +44,7 @@
         # be "gnocchi,sg-core"
         CEILOMETER_BACKEND: "gnocchi"
         CEILOMETER_BACKENDS: "gnocchi,sg-core"
+        PROMETHEUS_SERVICE_SCRAPE_TARGETS: "sg-core"
         CEILOMETER_PIPELINE_INTERVAL: 15
         CEILOMETER_ALARM_THRESHOLD: 6000000000
         GLOBAL_VENV: False
@@ -52,11 +52,12 @@
         test-config:
           $TEMPEST_CONFIG:
             service_available:
-              sg-core: True
+              sg_core: True
             telemetry_services:
               metric_backends: gnocchi,prometheus
             telemetry:
               disable_ssl_certificate_validation: True
+              ceilometer_polling_interval: 15
       tempest_test_regex: telemetry_tempest_plugin
       tox_envlist: all
 
@@ -130,7 +131,7 @@
       Telemetry devstack tempest tests job for a FIPS enabled Centos 9 stream system
     pre-run: playbooks/enable-fips.yaml
     vars:
-      nslookup_target: 'opendev.org'
+      nslookup_target: "opendev.org"
 
 - project:
     queue: telemetry

diff --git a/requirements.txt b/requirements.txt
index 4e6bb28..17f694d 100644
--- a/requirements.txt
+++ b/requirements.txt

@@ -1,9 +1,5 @@
-# The order of packages is significant, because pip processes them in the order
-# of appearance. Changing the order has an impact on the overall integration
-# process, which may cause wedges in the gate later.
-
 pbr>=2.0 # Apache-2.0
 oslo.config>=6.0.0 # Apache-2.0
 oslo.utils>=3.37.0 # Apache-2.0
 tempest>=17.1.0 # Apache-2.0
-gabbi>=1.30.0 # Apache-2.0
+gabbi>=2.7.0 # Apache-2.0

diff --git a/telemetry_tempest_plugin/aodh/api/sql/test_alarming_api.py b/telemetry_tempest_plugin/aodh/api/sql/test_alarming_api.py
index 73ab03d..b5c459e 100644
--- a/telemetry_tempest_plugin/aodh/api/sql/test_alarming_api.py
+++ b/telemetry_tempest_plugin/aodh/api/sql/test_alarming_api.py

@@ -62,7 +62,7 @@
                 body = self.alarming_client.create_alarm(
                     name=alarm_name,
                     type='event',
-                    severity=random.choice(sevs),
+                    severity=sevs[j % 2],
                     event_rule=self.rule)
                 alarms[alarm_name].append(body['alarm_id'])
         ordered_alarms = []

diff --git a/telemetry_tempest_plugin/config.py b/telemetry_tempest_plugin/config.py
index ad4ea85..a478158 100644
--- a/telemetry_tempest_plugin/config.py
+++ b/telemetry_tempest_plugin/config.py

@@ -76,6 +76,9 @@
     cfg.IntOpt('alarm_threshold',
                default=10,
                help="Threshold to cross for the alarm to trigger."),
+    cfg.IntOpt('scaledown_alarm_threshold',
+               default=2000000000,
+               help="Threshold to cross for the alarm to trigger."),
     cfg.BoolOpt("disable_ssl_certificate_validation",
                 default=False,
                 help="Disable SSL certificate validation when running "
@@ -83,7 +86,17 @@
     cfg.StrOpt('sg_core_service_url',
                default="127.0.0.1:3000",
                help="URL to sg-core prometheus endpoint"),
-
+    cfg.StrOpt('prometheus_service_url',
+               default="127.0.0.1:9090",
+               help="URL to prometheus endpoint"),
+    cfg.IntOpt('ceilometer_polling_interval',
+               default=300,
+               help="Polling interval configured for ceilometer. This can "
+                    "be used in test cases to wait for metrics to appear."),
+    cfg.IntOpt('prometheus_scrape_interval',
+               default=15,
+               help="Scrape interval configured for prometheus. This can "
+                    "be used in test cases to properly configure autoscaling")
 ]
 
 telemetry_services_opts = [
@@ -93,7 +106,7 @@
                 help="Backend store used to store metrics"),
     cfg.StrOpt('alarm_backend',
                default='mysql',
-               choices=['mysql', 'postgresq'],
+               choices=['mysql', 'postgresql'],
                help="Database used by the aodh service"),
 ]
 

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_gabbits/autoscaling.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_gabbits/autoscaling.yaml
index 6b87b2b..58821af 100644
--- a/telemetry_tempest_plugin/scenario/telemetry_integration_gabbits/autoscaling.yaml
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_gabbits/autoscaling.yaml

@@ -152,6 +152,9 @@
       desc: List alarms, no more exist
       url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
       method: GET
+      poll:
+          count: 30
+          delay: 2
       response_strings:
           - "[]"
 
@@ -159,5 +162,8 @@
       desc: List servers, no more exists
       url: $ENVIRON['NOVA_SERVICE_URL']/servers
       method: GET
+      poll:
+          count: 30
+          delay: 2
       response_strings:
           - "[]"

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
new file mode 100644
index 0000000..b66ae40
--- /dev/null
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml

@@ -0,0 +1,168 @@
+defaults:
+    request_headers:
+        x-auth-token: $ENVIRON['USER_TOKEN']
+
+tests:
+    - name: list alarms none
+      desc: Lists alarms, none yet exist
+      verbose: all
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: list servers none
+      desc: List servers, none yet exists
+      verbose: all
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: create stack
+      desc: Create an autoscaling stack
+      verbose: all
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks
+      method: POST
+      request_headers:
+          content-type: application/json
+      data: <@create_stack.json
+      status: 201
+
+    - name: control stack status
+      desc: Checks the stack have been created successfully
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      redirects: true
+      verbose: all
+      method: GET
+      status: 200
+      poll:
+          count: 300
+          delay: 1
+      response_json_paths:
+          $.stack.stack_status: "CREATE_COMPLETE"
+
+    - name: list servers grow
+      verbose: all
+      desc: Wait the autoscaling stack grow to two servers
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
+      method: GET
+      poll:
+          count: 600
+          delay: 1
+      response_json_paths:
+          $.servers[0].metadata.'metering.server_group': $RESPONSE['$.stack.id']
+          $.servers[1].metadata.'metering.server_group': $RESPONSE['$.stack.id']
+          $.servers[0].status: ACTIVE
+          $.servers[1].status: ACTIVE
+          $.servers.`len`: 2
+
+    - name: check prometheus query for the servers count .
+      desc: Check the Prometheus metric for the existence of servers
+      url: $ENVIRON['PROMETHEUS_SERVICE_URL']/api/v1/query
+      verbose: all
+      method: POST
+      request_headers:
+          content-type: application/x-www-form-urlencoded
+      data:
+         query=ceilometer_cpu{resource_name=~"te-$ENVIRON['RESOURCE_PREFIX'].*"}
+      poll:
+          count: 300
+          delay: 1
+      status: 200
+      response_json_paths:
+          $.data.result.`len`: 2
+
+    - name: check alarm cpu_alarm_high ALARM
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_high"
+      response_json_paths:
+          $[0].state: alarm
+
+    - name: check alarm cpu_alarm_high is OK
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 900
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_high-"
+      response_json_paths:
+          $[0].state: ok
+
+    - name: check alarm cpu_alarm_low is ALARM
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_low-"
+      response_json_paths:
+          $[1].state: alarm
+
+    - name: list servers shrink
+      verbose: all
+      desc: Wait for the autoscaling stack to delete one server
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
+      method: GET
+      poll:
+          count: 600
+          delay: 1
+      response_json_paths:
+          $.servers[0].metadata.'metering.server_group': $HISTORY['control stack status'].$RESPONSE['$.stack.id']
+          $.servers[0].status: ACTIVE
+          $.servers.`len`: 1
+
+    - name: get stack location
+      desc: Get the stack location
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      method: GET
+      status: 302
+
+    - name: delete stack
+      desc: Delete the stack
+      url: $LOCATION
+      method: DELETE
+      status: 204
+
+    - name: confirm that stack have been deleted
+      desc: Check the stack have been deleted to procced
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      redirects: true
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      status: 404
+
+    - name: list alarms deleted
+      desc: List alarms, no more exist
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
+      method: GET
+      poll:
+          count: 30
+          delay: 2
+      response_strings:
+          - "[]"
+
+    - name: list servers deleted
+      desc: List servers, no more exists
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers
+      method: GET
+      poll:
+          count: 30
+          delay: 2
+      response_strings:
+          - "[]"

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml
index f4cd0b1..5568878 100644
--- a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/ceilometer-sg-core-integration.yaml

@@ -3,7 +3,7 @@
     desc: Check the sg-core prometheus endpoint for ceilometer metrics
     GET: $ENVIRON['SG_CORE_SERVICE_URL']/metrics
     poll:
-      count: 60
+      count: $ENVIRON['CEILOMETER_POLLING_INTERVAL']
       delay: 2
     response_strings:
       - "ceilometer_image_size"

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
new file mode 100644
index 0000000..47d15f1
--- /dev/null
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json

@@ -0,0 +1,90 @@
+{
+    "stack_name": "$ENVIRON['STACK_NAME']",
+    "template": {
+        "heat_template_version": "2013-05-23",
+        "description": "Integration Test AutoScaling with heat+ceilometer+prometheus+aodh",
+        "resources": {
+            "asg": {
+                "type": "OS::Heat::AutoScalingGroup",
+                "properties": {
+                    "min_size": 1,
+                    "max_size": 2,
+                    "resource": {
+                        "type": "OS::Nova::Server",
+                        "properties": {
+                            "networks": [{ "network": "$ENVIRON['NEUTRON_NETWORK']" }],
+                            "flavor": "$ENVIRON['NOVA_FLAVOR_REF']",
+                            "image": "$ENVIRON['GLANCE_IMAGE_NAME']",
+                            "metadata": {
+                                "metering.server_group": { "get_param": "OS::stack_id" }
+                            },
+                            "user_data_format": "RAW",
+                            "user_data": {"Fn::Join": ["", [
+                                "#!/bin/sh\n",
+                                "echo 'Loading CPU'\n",
+                                "set -v\n",
+                                "cat /dev/urandom > /dev/null & sleep 120 ; kill $! \n"
+                            ]]}
+                        }
+                    }
+                }
+            },
+            "web_server_scaleup_policy": {
+                "type": "OS::Heat::ScalingPolicy",
+                "properties": {
+                    "adjustment_type": "change_in_capacity",
+                    "auto_scaling_group_id": { "get_resource": "asg" },
+                    "cooldown": 60,
+                    "scaling_adjustment": 1
+                }
+            },
+            "cpu_alarm_high": {
+                "type": "OS::Aodh::PrometheusAlarm",
+                "properties": {
+                    "description": "Scale-up if the mean CPU is higher than the threshold",
+                    "threshold": $ENVIRON["AODH_THRESHOLD"],
+                    "comparison_operator": "gt",
+                    "alarm_actions": [
+                        {
+                            "str_replace": {
+                                "template": "trust+url",
+                                "params": {
+                                    "url": { "get_attr": [ "web_server_scaleup_policy", "signal_url" ] }
+                                }
+                            }
+                        }
+                    ],
+                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[$ENVIRON['PROMETHEUS_RATE_DURATION']s])) * 100"
+                }
+            },
+            "web_server_scaledown_policy": {
+                "type": "OS::Heat::ScalingPolicy",
+                "properties": {
+                    "adjustment_type": "change_in_capacity",
+                    "auto_scaling_group_id": { "get_resource": "asg" },
+                    "cooldown": 60,
+                    "scaling_adjustment": -1
+                }
+            },
+            "cpu_alarm_low": {
+                "type": "OS::Aodh::PrometheusAlarm",
+                "properties": {
+                    "description": "Scale-down if the mean CPU is lower than the threshold",
+                    "threshold": $ENVIRON["SCALEDOWN_THRESHOLD"],
+                    "comparison_operator": "lt",
+                    "alarm_actions": [
+                        {
+                            "str_replace": {
+                                "template": "trust+url",
+                                "params": {
+                                    "url": { "get_attr": [ "web_server_scaledown_policy", "signal_url" ] }
+                                }
+                            }
+                        }
+                    ],
+                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[$ENVIRON['PROMETHEUS_RATE_DURATION']s])) * 100"
+                }
+            }
+        }
+    }
+}

diff --git a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
index c379470..0d6637b 100644
--- a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
+++ b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py

@@ -13,31 +13,100 @@
 import os
 
 from tempest import config
-import tempest.test
+from tempest.lib.common.utils import data_utils
+from tempest.scenario import manager
 
 from telemetry_tempest_plugin.scenario import utils
 
-CONF = config.CONF
 
 TEST_DIR = os.path.join(os.path.dirname(__file__),
                         'telemetry_integration_prometheus_gabbits')
 
 
-class PrometheusGabbiTest(tempest.test.BaseTestCase):
-    credentials = ['admin']
+class PrometheusGabbiTest(manager.ScenarioTest):
+    credentials = ['admin', 'primary']
 
     TIMEOUT_SCALING_FACTOR = 5
 
     @classmethod
     def skip_checks(cls):
         super(PrometheusGabbiTest, cls).skip_checks()
-        if not CONF.service_available.sg_core:
-            raise cls.skipException("sg-core support is required")
+        for name in ["aodh", "nova", "heat",
+                     "ceilometer", "glance", "sg_core"]:
+            cls._check_service(name)
+
+    @classmethod
+    def _check_service(cls, name):
+        if not getattr(config.CONF.service_available, name, False):
+            raise cls.skipException("%s support is required" %
+                                    name.capitalize())
+
+    @staticmethod
+    def _get_endpoint(auth, service):
+        opt_section = getattr(config.CONF, service)
+        endpoint_type = opt_section.endpoint_type
+        is_keystone_v3 = 'catalog' in auth[1]
+
+        if is_keystone_v3:
+            if endpoint_type.endswith("URL"):
+                endpoint_type = endpoint_type[:-3]
+            catalog = auth[1]['catalog']
+            endpoints = [e['endpoints'] for e in catalog
+                         if e['type'] == opt_section.catalog_type]
+            if not endpoints:
+                raise Exception("%s endpoint not found" %
+                                opt_section.catalog_type)
+            endpoints = [e['url'] for e in endpoints[0]
+                         if e['interface'] == endpoint_type]
+            if not endpoints:
+                raise Exception("%s interface not found for endpoint %s" %
+                                (endpoint_type,
+                                 opt_section.catalog_type))
+            return endpoints[0].rstrip('/')
+
+        else:
+            if not endpoint_type.endswith("URL"):
+                endpoint_type += "URL"
+            catalog = auth[1]['serviceCatalog']
+            endpoints = [e for e in catalog
+                         if e['type'] == opt_section.catalog_type]
+            if not endpoints:
+                raise Exception("%s endpoint not found" %
+                                opt_section.catalog_type)
+            return endpoints[0]['endpoints'][0][endpoint_type].rstrip('/')
 
     def _prep_test(self, filename):
+        auth = self.os_primary.auth_provider.get_auth()
+        networks = self.os_primary.networks_client.list_networks(
+            **{'router:external': False, 'fields': 'id'})['networks']
+        stack_name = data_utils.rand_name('telemetry')
+        # NOTE(marihan): This is being used in prometheus query as heat is
+        # using the last 7 digits from stack_name to create the autoscaling
+        # resources.
+        resource_prefix = stack_name[-7:]
+        prometheus_rate_duration = (
+            config.CONF.telemetry.ceilometer_polling_interval
+            + config.CONF.telemetry.prometheus_scrape_interval)
         os.environ.update({
+            "USER_TOKEN": auth[0],
+            "AODH_THRESHOLD": str(config.CONF.telemetry.alarm_threshold),
+            "SCALEDOWN_THRESHOLD":
+            str(config.CONF.telemetry.scaledown_alarm_threshold),
+            "AODH_SERVICE_URL": self._get_endpoint(auth, "alarming_plugin"),
+            "HEAT_SERVICE_URL": self._get_endpoint(auth, "heat_plugin"),
+            "NOVA_SERVICE_URL": self._get_endpoint(auth, "compute"),
             "SG_CORE_SERVICE_URL":
-            str(config.CONF.telemetry.sg_core_service_url),
+            config.CONF.telemetry.sg_core_service_url,
+            "CEILOMETER_POLLING_INTERVAL":
+            str(config.CONF.telemetry.ceilometer_polling_interval),
+            "PROMETHEUS_SERVICE_URL":
+            config.CONF.telemetry.prometheus_service_url,
+            "GLANCE_IMAGE_NAME": self.image_create(),
+            "NOVA_FLAVOR_REF": config.CONF.compute.flavor_ref,
+            "NEUTRON_NETWORK": networks[0].get('id'),
+            "STACK_NAME": stack_name,
+            "RESOURCE_PREFIX": resource_prefix,
+            "PROMETHEUS_RATE_DURATION": str(prometheus_rate_duration),
         })
 
 

diff --git a/telemetry_tempest_plugin/scenario/utils.py b/telemetry_tempest_plugin/scenario/utils.py
index 0904160..9be8fe1 100644
--- a/telemetry_tempest_plugin/scenario/utils.py
+++ b/telemetry_tempest_plugin/scenario/utils.py

@@ -38,7 +38,7 @@
         host='example.com', port=None,
         fixture_module=None,
         intercept=None,
-        handlers=runner.initialize_handlers([]),
+        handlers=runner.initialize_handlers([], []),
         test_loader_name="tempest")
 
     # NOTE(sileht): We hide stdout/stderr and reraise the failure
commit	c713d12aa351fb568ddbb5a5db120336dac191ff	[log] [tgz]
author	Zuul <zuul@review.opendev.org>	Wed Jun 19 14:15:51 2024 +0000
committer	Gerrit Code Review <review@openstack.org>	Wed Jun 19 14:15:51 2024 +0000
tree	52694519bdafae1b6a1805d61eaaaac2710e6e7f
parent	405a904690ae473b71389c7573ba1ca183d44ea0 [diff]
parent	615dc197b7faa53492a02714f2f63eb3e9f1aa0e [diff]