Change some default values for Rabbit driver
It was observed that for the rabbitmq-server version 3.8.2 default
values during failover of one of the rabbit nodes cause rapid connection
recreation. In some cases it leads to the creation of broken exchanges
and hangs of OpenStack operations. Changing rabbit_retry_interval to 5,
rabbit_retry_backoff to 10 and kombu_reconnect_delay to 5.0 fix an
issue.
This change is Pike-only, Queens is implemented in oslo-templates
formula.
Related-Issue: PROD-34332
Change-Id: Id06a64975af581c1fe4e8cd1ae7bf46f94df0858
diff --git a/cinder/files/pike/cinder.conf.controller.Debian b/cinder/files/pike/cinder.conf.controller.Debian
index 4cd39ae..b2115cd 100644
--- a/cinder/files/pike/cinder.conf.controller.Debian
+++ b/cinder/files/pike/cinder.conf.controller.Debian
@@ -196,6 +196,24 @@
{%- endif %}
{%- endif %}
+# NOTE(pas-ha) default values of below options are problematic with RMQ 3.8,
+# see PROD-34322
+# recreating queues on a secondary broker immediately after primary broker
+# has gone down leads to these queues being non-functional.
+
+# How long to wait before reconnecting in response to an AMQP consumer
+# cancel notification. (floating point value)
+#kombu_reconnect_delay = 1.0
+kombu_reconnect_delay = 5.0
+
+# How frequently to retry connecting with RabbitMQ. (integer value)
+#rabbit_retry_interval = 1
+rabbit_retry_interval = 5
+
+# How long to backoff for between retries when connecting to RabbitMQ.
+# (integer value)
+#rabbit_retry_backoff = 2
+rabbit_retry_backoff = 10
[keystone_authtoken]
signing_dir=/tmp/keystone-signing-cinder
diff --git a/cinder/files/pike/cinder.conf.volume.Debian b/cinder/files/pike/cinder.conf.volume.Debian
index a06fd2e..4900816 100644
--- a/cinder/files/pike/cinder.conf.volume.Debian
+++ b/cinder/files/pike/cinder.conf.volume.Debian
@@ -180,6 +180,25 @@
{%- endif %}
{%- endif %}
+# NOTE(pas-ha) default values of below options are problematic with RMQ 3.8,
+# see PROD-34322
+# recreating queues on a secondary broker immediately after primary broker
+# has gone down leads to these queues being non-functional.
+
+# How long to wait before reconnecting in response to an AMQP consumer
+# cancel notification. (floating point value)
+#kombu_reconnect_delay = 1.0
+kombu_reconnect_delay = 5.0
+
+# How frequently to retry connecting with RabbitMQ. (integer value)
+#rabbit_retry_interval = 1
+rabbit_retry_interval = 5
+
+# How long to backoff for between retries when connecting to RabbitMQ.
+# (integer value)
+#rabbit_retry_backoff = 2
+rabbit_retry_backoff = 10
+
[keystone_authtoken]
signing_dir=/tmp/keystone-signing-cinder
revocation_cache_time = 10