Add Autoscaling test with prometheus Change-Id: I74a897a213167da8b4bde99cc879a7078993ccd4

commit: d00901d59e04f0f96c0f24ee83656fae2ed28881 [log] [tgz]
author: mgirgisf <mgirgisf@redhat.com> Wed Mar 13 10:31:56 2024 +0100
committer: Erno Kuvaja <jokke@usr.fi> Fri Apr 05 10:42:49 2024 +0000
tree: afbfed52495be0b1dea2da93c6ceea5e83323ad9
parent: b8fd0674ad94817382c3712e9d47b90397dd5128 [diff]
diff --git a/.zuul.yaml b/.zuul.yaml
index 5a821b6..9e2cee5 100644
--- a/.zuul.yaml
+++ b/.zuul.yaml

@@ -32,7 +32,7 @@
         sg-core: https://github.com/infrawatch/sg-core
       # NOTE(jokke): The following will disable the gabbi based integration tests for now.
       # We will need to figure out how we refactor them to be stable in the CI.
-      tempest_exclude_regex: (^telemetry_tempest_plugin\.scenario\.test_telemetry_integration)
+      tempest_exclude_regex: (^telemetry_tempest_plugin\.scenario\.test_telemetry_integration\.)
       devstack_services:
         tempest: true
       devstack_localrc:
@@ -47,6 +47,7 @@
         # be "gnocchi,sg-core"
         CEILOMETER_BACKEND: "gnocchi"
         CEILOMETER_BACKENDS: "gnocchi,sg-core"
+        PROMETHEUS_SERVICE_SCRAPE_TARGETS: "sg-core"
         CEILOMETER_PIPELINE_INTERVAL: 15
         CEILOMETER_ALARM_THRESHOLD: 6000000000
         GLOBAL_VENV: False

diff --git a/telemetry_tempest_plugin/config.py b/telemetry_tempest_plugin/config.py
index ed1017a..8ff90aa 100644
--- a/telemetry_tempest_plugin/config.py
+++ b/telemetry_tempest_plugin/config.py

@@ -76,6 +76,9 @@
     cfg.IntOpt('alarm_threshold',
                default=10,
                help="Threshold to cross for the alarm to trigger."),
+    cfg.IntOpt('scaledown_alarm_threshold',
+               default=2000000000,
+               help="Threshold to cross for the alarm to trigger."),
     cfg.BoolOpt("disable_ssl_certificate_validation",
                 default=False,
                 help="Disable SSL certificate validation when running "
@@ -83,6 +86,9 @@
     cfg.StrOpt('sg_core_service_url',
                default="127.0.0.1:3000",
                help="URL to sg-core prometheus endpoint"),
+    cfg.StrOpt('prometheus_service_url',
+               default="127.0.0.1:9090",
+               help="URL to prometheus endpoint"),
     cfg.IntOpt('ceilometer_polling_interval',
                default=300,
                help="Polling interval configured for ceilometer. This can "

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
new file mode 100644
index 0000000..abe0293
--- /dev/null
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml

@@ -0,0 +1,162 @@
+defaults:
+    request_headers:
+        x-auth-token: $ENVIRON['USER_TOKEN']
+
+tests:
+    - name: list alarms none
+      desc: Lists alarms, none yet exist
+      verbose: all
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: list servers none
+      desc: List servers, none yet exists
+      verbose: all
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: create stack
+      desc: Create an autoscaling stack
+      verbose: all
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks
+      method: POST
+      request_headers:
+          content-type: application/json
+      data: <@create_stack.json
+      status: 201
+
+    - name: control stack status
+      desc: Checks the stack have been created successfully
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      redirects: true
+      verbose: all
+      method: GET
+      status: 200
+      poll:
+          count: 300
+          delay: 1
+      response_json_paths:
+          $.stack.stack_status: "CREATE_COMPLETE"
+
+    - name: list servers grow
+      verbose: all
+      desc: Wait the autoscaling stack grow to two servers
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
+      method: GET
+      poll:
+          count: 600
+          delay: 1
+      response_json_paths:
+          $.servers[0].metadata.'metering.server_group': $RESPONSE['$.stack.id']
+          $.servers[1].metadata.'metering.server_group': $RESPONSE['$.stack.id']
+          $.servers[0].status: ACTIVE
+          $.servers[1].status: ACTIVE
+          $.servers.`len`: 2
+
+    - name: check prometheus query for the servers count .
+      desc: Check the Prometheus metric for the existence of servers
+      url: $ENVIRON['PROMETHEUS_SERVICE_URL']/api/v1/query
+      verbose: all
+      method: POST
+      request_headers:
+          content-type: application/x-www-form-urlencoded
+      data:
+         query=ceilometer_cpu{resource_name=~"te-$ENVIRON['RESOURCE_PREFIX'].*"}
+      poll:
+          count: 300
+          delay: 1
+      status: 200
+      response_json_paths:
+          $.data.result.`len`: 2
+
+    - name: check alarm cpu_alarm_high ALARM
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_high"
+      response_json_paths:
+          $[0].state: alarm
+
+    - name: check alarm cpu_alarm_high is OK
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 900
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_high-"
+      response_json_paths:
+          $[0].state: ok
+
+    - name: check alarm cpu_alarm_low is ALARM
+      verbose: all
+      desc: Check the aodh alarm and its state
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms?sort=name%3Aasc
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      response_strings:
+          - "$ENVIRON['STACK_NAME']-cpu_alarm_low-"
+      response_json_paths:
+          $[1].state: alarm
+
+    - name: list servers shrink
+      verbose: all
+      desc: Wait for the autoscaling stack to delete one server
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers/detail
+      method: GET
+      poll:
+          count: 600
+          delay: 1
+      response_json_paths:
+          $.servers[0].metadata.'metering.server_group': $HISTORY['control stack status'].$RESPONSE['$.stack.id']
+          $.servers[0].status: ACTIVE
+          $.servers.`len`: 1
+
+    - name: get stack location
+      desc: Get the stack location
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      method: GET
+      status: 302
+
+    - name: delete stack
+      desc: Delete the stack
+      url: $LOCATION
+      method: DELETE
+      status: 204
+
+    - name: confirm that stack have been deleted
+      desc: Check the stack have been deleted to procced
+      url: $ENVIRON['HEAT_SERVICE_URL']/stacks/$ENVIRON['STACK_NAME']
+      redirects: true
+      method: GET
+      poll:
+          count: 600
+          delay: 5
+      status: 404
+
+    - name: list alarms deleted
+      desc: List alarms, no more exist
+      url: $ENVIRON['AODH_SERVICE_URL']/v2/alarms
+      method: GET
+      response_strings:
+          - "[]"
+
+    - name: list servers deleted
+      desc: List servers, no more exists
+      url: $ENVIRON['NOVA_SERVICE_URL']/servers
+      method: GET
+      response_strings:
+          - "[]"

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
new file mode 100644
index 0000000..4f6962b
--- /dev/null
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json

@@ -0,0 +1,90 @@
+{
+    "stack_name": "$ENVIRON['STACK_NAME']",
+    "template": {
+        "heat_template_version": "2013-05-23",
+        "description": "Integration Test AutoScaling with heat+ceilometer+prometheus+aodh",
+        "resources": {
+            "asg": {
+                "type": "OS::Heat::AutoScalingGroup",
+                "properties": {
+                    "min_size": 1,
+                    "max_size": 2,
+                    "resource": {
+                        "type": "OS::Nova::Server",
+                        "properties": {
+                            "networks": [{ "network": "$ENVIRON['NEUTRON_NETWORK']" }],
+                            "flavor": "$ENVIRON['NOVA_FLAVOR_REF']",
+                            "image": "$ENVIRON['GLANCE_IMAGE_NAME']",
+                            "metadata": {
+                                "metering.server_group": { "get_param": "OS::stack_id" }
+                            },
+                            "user_data_format": "RAW",
+                            "user_data": {"Fn::Join": ["", [
+                                "#!/bin/sh\n",
+                                "echo 'Loading CPU'\n",
+                                "set -v\n",
+                                "cat /dev/urandom > /dev/null & sleep 120 ; kill $! \n"
+                            ]]}
+                        }
+                    }
+                }
+            },
+            "web_server_scaleup_policy": {
+                "type": "OS::Heat::ScalingPolicy",
+                "properties": {
+                    "adjustment_type": "change_in_capacity",
+                    "auto_scaling_group_id": { "get_resource": "asg" },
+                    "cooldown": 60,
+                    "scaling_adjustment": 1
+                }
+            },
+            "cpu_alarm_high": {
+                "type": "OS::Aodh::PrometheusAlarm",
+                "properties": {
+                    "description": "Scale-up if the mean CPU is higher than the threshold",
+                    "threshold": $ENVIRON["AODH_THRESHOLD"],
+                    "comparison_operator": "gt",
+                    "alarm_actions": [
+                        {
+                            "str_replace": {
+                                "template": "trust+url",
+                                "params": {
+                                    "url": { "get_attr": [ "web_server_scaleup_policy", "signal_url" ] }
+                                }
+                            }
+                        }
+                    ],
+                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100"
+                }
+            },
+            "web_server_scaledown_policy": {
+                "type": "OS::Heat::ScalingPolicy",
+                "properties": {
+                    "adjustment_type": "change_in_capacity",
+                    "auto_scaling_group_id": { "get_resource": "asg" },
+                    "cooldown": 60,
+                    "scaling_adjustment": -1
+                }
+            },
+            "cpu_alarm_low": {
+                "type": "OS::Aodh::PrometheusAlarm",
+                "properties": {
+                    "description": "Scale-down if the mean CPU is lower than the threshold",
+                    "threshold": $ENVIRON["SCALEDOWN_THRESHOLD"],
+                    "comparison_operator": "lt",
+                    "alarm_actions": [
+                        {
+                            "str_replace": {
+                                "template": "trust+url",
+                                "params": {
+                                    "url": { "get_attr": [ "web_server_scaledown_policy", "signal_url" ] }
+                                }
+                            }
+                        }
+                    ],
+                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[1m])) * 100"
+                }
+            }
+        }
+    }
+}

diff --git a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
index d4dcc0e..93d05de 100644
--- a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
+++ b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py

@@ -13,11 +13,11 @@
 import os
 
 from tempest import config
+from tempest.lib.common.utils import data_utils
 from tempest.scenario import manager
 
 from telemetry_tempest_plugin.scenario import utils
 
-CONF = config.CONF
 
 TEST_DIR = os.path.join(os.path.dirname(__file__),
                         'telemetry_integration_prometheus_gabbits')
@@ -31,19 +31,79 @@
     @classmethod
     def skip_checks(cls):
         super(PrometheusGabbiTest, cls).skip_checks()
-        for name in ["sg_core", "glance", "ceilometer"]:
-            if not getattr(CONF.service_available, name, False):
-                raise cls.skipException("%s support is required" %
-                                        name.capitalize())
+        for name in ["aodh", "nova", "heat",
+                     "ceilometer", "glance", "sg_core"]:
+            cls._check_service(name)
+
+    @classmethod
+    def _check_service(cls, name):
+        if not getattr(config.CONF.service_available, name, False):
+            raise cls.skipException("%s support is required" %
+                                    name.capitalize())
+
+    @staticmethod
+    def _get_endpoint(auth, service):
+        opt_section = getattr(config.CONF, service)
+        endpoint_type = opt_section.endpoint_type
+        is_keystone_v3 = 'catalog' in auth[1]
+
+        if is_keystone_v3:
+            if endpoint_type.endswith("URL"):
+                endpoint_type = endpoint_type[:-3]
+            catalog = auth[1]['catalog']
+            endpoints = [e['endpoints'] for e in catalog
+                         if e['type'] == opt_section.catalog_type]
+            if not endpoints:
+                raise Exception("%s endpoint not found" %
+                                opt_section.catalog_type)
+            endpoints = [e['url'] for e in endpoints[0]
+                         if e['interface'] == endpoint_type]
+            if not endpoints:
+                raise Exception("%s interface not found for endpoint %s" %
+                                (endpoint_type,
+                                 opt_section.catalog_type))
+            return endpoints[0].rstrip('/')
+
+        else:
+            if not endpoint_type.endswith("URL"):
+                endpoint_type += "URL"
+            catalog = auth[1]['serviceCatalog']
+            endpoints = [e for e in catalog
+                         if e['type'] == opt_section.catalog_type]
+            if not endpoints:
+                raise Exception("%s endpoint not found" %
+                                opt_section.catalog_type)
+            return endpoints[0]['endpoints'][0][endpoint_type].rstrip('/')
 
     def _prep_test(self, filename):
+        auth = self.os_primary.auth_provider.get_auth()
+        networks = self.os_primary.networks_client.list_networks(
+            **{'router:external': False, 'fields': 'id'})['networks']
+        stack_name = data_utils.rand_name('telemetry')
+        # NOTE(marihan): This is being used in prometheus query as heat is
+        # using the last 7 digits from stack_name to create the autoscaling
+        # resources.
+        resource_prefix = stack_name[-7:]
         os.environ.update({
+            "USER_TOKEN": auth[0],
+            "AODH_THRESHOLD": str(config.CONF.telemetry.alarm_threshold),
+            "SCALEDOWN_THRESHOLD":
+            str(config.CONF.telemetry.scaledown_alarm_threshold),
+            "AODH_SERVICE_URL": self._get_endpoint(auth, "alarming_plugin"),
+            "HEAT_SERVICE_URL": self._get_endpoint(auth, "heat_plugin"),
+            "NOVA_SERVICE_URL": self._get_endpoint(auth, "compute"),
             "SG_CORE_SERVICE_URL":
-            str(config.CONF.telemetry.sg_core_service_url),
+            config.CONF.telemetry.sg_core_service_url,
             "CEILOMETER_POLLING_INTERVAL":
-            str(CONF.telemetry.ceilometer_polling_interval),
+            str(config.CONF.telemetry.ceilometer_polling_interval),
+            "PROMETHEUS_SERVICE_URL":
+            config.CONF.telemetry.prometheus_service_url,
+            "GLANCE_IMAGE_NAME": self.image_create(),
+            "NOVA_FLAVOR_REF": config.CONF.compute.flavor_ref,
+            "NEUTRON_NETWORK": networks[0].get('id'),
+            "STACK_NAME": stack_name,
+            "RESOURCE_PREFIX": resource_prefix,
         })
-        self.image_create()
 
 
 utils.generate_tests(PrometheusGabbiTest, TEST_DIR)
commit	d00901d59e04f0f96c0f24ee83656fae2ed28881	[log] [tgz]
author	mgirgisf <mgirgisf@redhat.com>	Wed Mar 13 10:31:56 2024 +0100
committer	Erno Kuvaja <jokke@usr.fi>	Fri Apr 05 10:42:49 2024 +0000
tree	afbfed52495be0b1dea2da93c6ceea5e83323ad9
parent	b8fd0674ad94817382c3712e9d47b90397dd5128 [diff]