Merge "cleaning metrics in legacy monitoring - removed latency metrics, fixed pool usage alerts"
diff --git a/ceph/meta/prometheus.yml b/ceph/meta/prometheus.yml
index 9af58c2..ec0200b 100644
--- a/ceph/meta/prometheus.yml
+++ b/ceph/meta/prometheus.yml
@@ -70,32 +70,6 @@
annotations:
summary: "{%-endraw %}{{100*threshold}}{%- raw %}% of Ceph space is used"
description: "{{ $ value }} bytes of Ceph OSD space (>= {%-endraw %}{{100*threshold}}{%- raw %}%) is used for 3 minutes. For details, run 'ceph df'."
- CephServiceApplyLatencyTooHigh:
- {%- endraw %}
- {%- set threshold = monitoring.apply_latency_threshold|default('0.007')|float %}
- if: >-
- avg(ceph_apply_latency_sum) / avg(ceph_apply_latency_avgcount) > {{threshold}}
- {%- raw %}
- for: 3m
- labels:
- severity: warning
- service: ceph
- annotations:
- summary: "Ceph apply latency reached the limit of {%- endraw %}{{threshold}}{%- raw %}s"
- description: "The average Ceph apply latency is more than {%- endraw %}{{threshold}}{%- raw %} seconds for 3 minutes."
- CephServiceCommitLatencyTooHigh:
- {%- endraw %}
- {%- set threshold = monitoring.commit_latency_threshold|default('0.7')|float %}
- if: >-
- avg(ceph_commit_latency_sum) / avg(ceph_commitcycle_latency_avgcount) > {{threshold}}
- {%- raw %}
- for: 3m
- labels:
- severity: warning
- service: ceph
- annotations:
- summary: "Ceph commit latency reached the limit of {%- endraw %}{{threshold}}{%- raw %}s"
- description: "The average Ceph commit latency is more than {%- endraw %}{{threshold}}{%- raw %} seconds for 3 minutes."
{% endraw %}
{%- if setup.pool is defined %}
{%- for pool_name, pool in setup.pool.iteritems() %}
@@ -115,10 +89,10 @@
annotations:
summary: "{{100*threshold}}% of Ceph pool space is used"
description: "The Ceph {{pool_name}} pool uses {{100*threshold}}% of available space for 3 minutes. For details, run 'ceph df'."
- CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageMinor:
+ CephPool{{pool_name|replace(".", "")|replace("-", "")}}SpaceUsageCritical:
{%- set threshold = monitoring_pool.pool_space_used_critical_threshold|default('0.85')|float %}
if: >-
- ceph_pool_usage_bytes_used{name="{{pool_name}}"} / ceph_pool_usage_max_avail{name="{{pool_name}}"} > {{threshold}}
+ ceph_pool_usage_bytes_used{name="{{pool_name}}"} / ceph_pool_usage_max_avail{name="{{pool_name}}"} + ceph_pool_usage_bytes_used{name="{{pool_name}}"}) > {{threshold}}
for: 3m
labels:
severity: minor