Use server_group in Prom autoscaling scenario Thanks to the recent developments in ceilometer and sg-core, we can use server_group for grouping instances from the same stack for autoscaling purposes. This is how the instances are grouped in gnocchi based autoscaling. It's much easier for the users to configure and it should be the prefered option when using autoscaling. For backwards compatibility with current stable branches I added a "autoscaling_instance_grouping" config option. The old way ("prefix") of instance grouping is used by default and so tempest tests will continue working on stable branches. By setting the option to "metadata", the new way of instance grouping will be used. I'll set this setting in .zuul.yaml of all telemetry repositories on master branches in follow-up patches. Change-Id: I2770e9d47b914941f938f63d92ab7868fe09d7b9

commit: afe631a2d0335a010f8f784b144f37305ece0147 [log] [tgz]
author: Jaromir Wysoglad <jwysogla@redhat.com> Wed Sep 25 11:02:14 2024 -0400
committer: Jaromir Wysoglad <jwysogla@redhat.com> Mon Oct 07 14:00:48 2024 -0400
tree: 957a55bad2fe622eb69eb493ce27b23c328639d8
parent: 4ec1f85148b244f0ecc8bc1548c58810b1f90357 [diff]
diff --git a/telemetry_tempest_plugin/config.py b/telemetry_tempest_plugin/config.py
index a478158..68e47ce 100644
--- a/telemetry_tempest_plugin/config.py
+++ b/telemetry_tempest_plugin/config.py

@@ -96,7 +96,15 @@
     cfg.IntOpt('prometheus_scrape_interval',
                default=15,
                help="Scrape interval configured for prometheus. This can "
-                    "be used in test cases to properly configure autoscaling")
+                    "be used in test cases to properly configure autoscaling"),
+    cfg.StrOpt('autoscaling_instance_grouping',
+               default='prefix',
+               choices=['prefix', 'metadata'],
+               help="How to group instances for autoscaling testing. "
+                    "'prefix' relies on the instances having a common string "
+                    "at the start of their name. 'metadata' is a new and "
+                    "prefered way of grouping since 2024.2 relying on "
+                    "metering.server_group instance metadata")
 ]
 
 telemetry_services_opts = [

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
index b66ae40..158cbde 100644
--- a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml

@@ -57,22 +57,6 @@
           $.servers[1].status: ACTIVE
           $.servers.`len`: 2
 
-    - name: check prometheus query for the servers count .
-      desc: Check the Prometheus metric for the existence of servers
-      url: $ENVIRON['PROMETHEUS_SERVICE_URL']/api/v1/query
-      verbose: all
-      method: POST
-      request_headers:
-          content-type: application/x-www-form-urlencoded
-      data:
-         query=ceilometer_cpu{resource_name=~"te-$ENVIRON['RESOURCE_PREFIX'].*"}
-      poll:
-          count: 300
-          delay: 1
-      status: 200
-      response_json_paths:
-          $.data.result.`len`: 2
-
     - name: check alarm cpu_alarm_high ALARM
       verbose: all
       desc: Check the aodh alarm and its state

diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
index 036e5fb..32a8219 100644
--- a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json

@@ -54,7 +54,7 @@
                             }
                         }
                     ],
-                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[$ENVIRON['PROMETHEUS_RATE_DURATION']s])) * 100"
+                    "query": $ENVIRON["QUERY"]
                 }
             },
             "web_server_scaledown_policy": {
@@ -82,7 +82,7 @@
                             }
                         }
                     ],
-                    "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[$ENVIRON['PROMETHEUS_RATE_DURATION']s])) * 100"
+                    "query": $ENVIRON["QUERY"]
                 }
             }
         }

diff --git a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
index 9c13b68..122a3f9 100644
--- a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
+++ b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py

@@ -104,6 +104,28 @@
 
         super(PrometheusGabbiTest, cls).resource_cleanup()
 
+    def _prep_query(self, prometheus_rate_duration, resource_prefix):
+        if config.CONF.telemetry.autoscaling_instance_grouping == "metadata":
+            query = ("\"(rate(ceilometer_cpu{{server_group=~'stack_id'}}"
+                     "[{}s])) * 100\"").format(prometheus_rate_duration)
+            metadata_query = '''
+            {{
+                "str_replace": {{
+                    "template": {},
+                    "params": {{
+                        "stack_id": {{ "get_param": "OS::stack_id" }}
+                    }}
+                }}
+            }}
+            '''.format(query)
+            return metadata_query
+
+        else:
+            prefix_query = '''
+            "(rate(ceilometer_cpu{{resource_name=~'te-{}.*'}}[{}s])) * 100"
+            '''.format(resource_prefix, prometheus_rate_duration)
+            return prefix_query
+
     def _prep_test(self, filename):
         auth = self.os_primary.auth_provider.get_auth()
         networks = self.os_primary.networks_client.list_networks(
@@ -115,6 +137,7 @@
         prometheus_rate_duration = (
             config.CONF.telemetry.ceilometer_polling_interval
             + config.CONF.telemetry.prometheus_scrape_interval)
+        query = self._prep_query(prometheus_rate_duration, resource_prefix)
         os.environ.update({
             "USER_TOKEN": auth[0],
             "AODH_THRESHOLD": str(config.CONF.telemetry.alarm_threshold),
@@ -136,6 +159,7 @@
             "RESOURCE_PREFIX": resource_prefix,
             "PROMETHEUS_RATE_DURATION": str(prometheus_rate_duration),
             "LOAD_LENGTH": str(prometheus_rate_duration * 2),
+            "QUERY": query,
         })
commit	afe631a2d0335a010f8f784b144f37305ece0147	[log] [tgz]
author	Jaromir Wysoglad <jwysogla@redhat.com>	Wed Sep 25 11:02:14 2024 -0400
committer	Jaromir Wysoglad <jwysogla@redhat.com>	Mon Oct 07 14:00:48 2024 -0400
tree	957a55bad2fe622eb69eb493ce27b23c328639d8
parent	4ec1f85148b244f0ecc8bc1548c58810b1f90357 [diff]