Add alert for unreachable rgw endpoints
Change-Id: I38198e454e43309ae8b2507e0c57dfcaedf0da98
Related-Prod: PROD-35124
diff --git a/ceph/meta/prometheus.yml b/ceph/meta/prometheus.yml
index d2ef410..86c1aff 100644
--- a/ceph/meta/prometheus.yml
+++ b/ceph/meta/prometheus.yml
@@ -172,6 +172,26 @@
query: >-
rate(diskio_read_time[5m])
alert:
+ RadosGWOutage:
+ if: >-
+ max(openstack_api_check_status{name=~"radosgw.*"}) == 0
+ labels:
+ severity: critical
+ service: ceph
+ annotations:
+ summary: "RadosGW outage"
+ description: >-
+ RadosGW is not accessible for all available RadosGW endpoints in the OpenStack service catalog.
+ RadosGWDown:
+ if: >-
+ openstack_api_check_status{name=~"radosgw.*"} == 0
+ labels:
+ severity: major
+ service: ceph
+ annotations:
+ summary: "{{ $labels.name }} endpoint is not accessible"
+ description: >-
+ RadosGW is not accessible for the {{ $labels.name }} endpoint.
CephClusterHealthMinor:
if: >-
ceph_health_status == 1