Use server_group in Prom autoscaling scenario
Thanks to the recent developments in ceilometer and sg-core,
we can use server_group for grouping instances from the same
stack for autoscaling purposes. This is how the instances are
grouped in gnocchi based autoscaling. It's much easier for the
users to configure and it should be the prefered option when using
autoscaling.
For backwards compatibility with current stable branches I
added a "autoscaling_instance_grouping" config option. The old
way ("prefix") of instance grouping is used by default and so
tempest tests will continue working on stable branches. By
setting the option to "metadata", the new way of instance
grouping will be used. I'll set this setting in .zuul.yaml
of all telemetry repositories on master branches in follow-up
patches.
Change-Id: I2770e9d47b914941f938f63d92ab7868fe09d7b9
diff --git a/telemetry_tempest_plugin/config.py b/telemetry_tempest_plugin/config.py
index a478158..68e47ce 100644
--- a/telemetry_tempest_plugin/config.py
+++ b/telemetry_tempest_plugin/config.py
@@ -96,7 +96,15 @@
cfg.IntOpt('prometheus_scrape_interval',
default=15,
help="Scrape interval configured for prometheus. This can "
- "be used in test cases to properly configure autoscaling")
+ "be used in test cases to properly configure autoscaling"),
+ cfg.StrOpt('autoscaling_instance_grouping',
+ default='prefix',
+ choices=['prefix', 'metadata'],
+ help="How to group instances for autoscaling testing. "
+ "'prefix' relies on the instances having a common string "
+ "at the start of their name. 'metadata' is a new and "
+ "prefered way of grouping since 2024.2 relying on "
+ "metering.server_group instance metadata")
]
telemetry_services_opts = [
diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
index b66ae40..158cbde 100644
--- a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/autoscaling.yaml
@@ -57,22 +57,6 @@
$.servers[1].status: ACTIVE
$.servers.`len`: 2
- - name: check prometheus query for the servers count .
- desc: Check the Prometheus metric for the existence of servers
- url: $ENVIRON['PROMETHEUS_SERVICE_URL']/api/v1/query
- verbose: all
- method: POST
- request_headers:
- content-type: application/x-www-form-urlencoded
- data:
- query=ceilometer_cpu{resource_name=~"te-$ENVIRON['RESOURCE_PREFIX'].*"}
- poll:
- count: 300
- delay: 1
- status: 200
- response_json_paths:
- $.data.result.`len`: 2
-
- name: check alarm cpu_alarm_high ALARM
verbose: all
desc: Check the aodh alarm and its state
diff --git a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
index 036e5fb..32a8219 100644
--- a/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
+++ b/telemetry_tempest_plugin/scenario/telemetry_integration_prometheus_gabbits/create_stack.json
@@ -54,7 +54,7 @@
}
}
],
- "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[$ENVIRON['PROMETHEUS_RATE_DURATION']s])) * 100"
+ "query": $ENVIRON["QUERY"]
}
},
"web_server_scaledown_policy": {
@@ -82,7 +82,7 @@
}
}
],
- "query": "(rate(ceilometer_cpu{resource_name=~'te-$ENVIRON['RESOURCE_PREFIX'].*'}[$ENVIRON['PROMETHEUS_RATE_DURATION']s])) * 100"
+ "query": $ENVIRON["QUERY"]
}
}
}
diff --git a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
index 9c13b68..122a3f9 100644
--- a/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
+++ b/telemetry_tempest_plugin/scenario/test_telemetry_integration_prometheus.py
@@ -104,6 +104,28 @@
super(PrometheusGabbiTest, cls).resource_cleanup()
+ def _prep_query(self, prometheus_rate_duration, resource_prefix):
+ if config.CONF.telemetry.autoscaling_instance_grouping == "metadata":
+ query = ("\"(rate(ceilometer_cpu{{server_group=~'stack_id'}}"
+ "[{}s])) * 100\"").format(prometheus_rate_duration)
+ metadata_query = '''
+ {{
+ "str_replace": {{
+ "template": {},
+ "params": {{
+ "stack_id": {{ "get_param": "OS::stack_id" }}
+ }}
+ }}
+ }}
+ '''.format(query)
+ return metadata_query
+
+ else:
+ prefix_query = '''
+ "(rate(ceilometer_cpu{{resource_name=~'te-{}.*'}}[{}s])) * 100"
+ '''.format(resource_prefix, prometheus_rate_duration)
+ return prefix_query
+
def _prep_test(self, filename):
auth = self.os_primary.auth_provider.get_auth()
networks = self.os_primary.networks_client.list_networks(
@@ -115,6 +137,7 @@
prometheus_rate_duration = (
config.CONF.telemetry.ceilometer_polling_interval
+ config.CONF.telemetry.prometheus_scrape_interval)
+ query = self._prep_query(prometheus_rate_duration, resource_prefix)
os.environ.update({
"USER_TOKEN": auth[0],
"AODH_THRESHOLD": str(config.CONF.telemetry.alarm_threshold),
@@ -136,6 +159,7 @@
"RESOURCE_PREFIX": resource_prefix,
"PROMETHEUS_RATE_DURATION": str(prometheus_rate_duration),
"LOAD_LENGTH": str(prometheus_rate_duration * 2),
+ "QUERY": query,
})