updated defaults for mon_max_pg_per_osd variable.
Added additional alerts for pg number on osds.
Change-Id: I5042a166c6c81923c630d05bd7e2499226a707d6
Related-Prod: PROD-26472
diff --git a/ceph/map.jinja b/ceph/map.jinja
index 3140a4c..b2fde96 100644
--- a/ceph/map.jinja
+++ b/ceph/map.jinja
@@ -87,6 +87,8 @@
default:
cluster_stats: {}
node_stats: {}
+ osd_pgnum_warning: 200
+ osd_pgnum_critical: 300
{%- endload %}
{% set monitoring = salt['grains.filter_by'](monitoring_defaults, merge=salt['pillar.get']('ceph:monitoring')) %}
diff --git a/ceph/meta/prometheus.yml b/ceph/meta/prometheus.yml
index 86c8a47..d29c409 100644
--- a/ceph/meta/prometheus.yml
+++ b/ceph/meta/prometheus.yml
@@ -225,6 +225,32 @@
annotations:
summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
description: "{{ $ value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+ CephOsdPgNumTooHighWarning:
+ {%- endraw %}
+ {%- set threshold = monitoring.osd_pgnum_warning %}
+ if: >-
+ max(ceph_osd_numpg) > {{threshold}}
+ {%- raw %}
+ for: 3m
+ labels:
+ severity: warning
+ service: ceph
+ annotations:
+ summary: "Some OSDs have more than {% endraw %}{{threshold}}{% raw %} PGs"
+ description: "Some OSDs contain more than {% endraw %}{{threshold}}{% raw %} PGs. This may have a negative impact on the cluster performance. For details, run 'ceph pg dump'"
+ CephOsdPgNumTooHighCritical:
+ {%- endraw %}
+ {%- set threshold = monitoring.osd_pgnum_critical %}
+ if: >-
+ max(ceph_osd_numpg) > {{threshold}}
+ {%- raw %}
+ for: 3m
+ labels:
+ severity: critical
+ service: ceph
+ annotations:
+ summary: "Some OSDs have more than {% endraw %}{{threshold}}{% raw %} PGs"
+ description: "Some OSDs contain more than {% endraw %}{{threshold}}{% raw %} PGs. This may have a negative impact on the cluster performance. For details, run 'ceph pg dump'"
{%- endraw %}
{%- if setup.pool is defined %}
{%- for pool_name, pool in setup.pool.iteritems() %}
diff --git a/metadata/service/mon/cluster.yml b/metadata/service/mon/cluster.yml
index 6a10da2..94c484b 100644
--- a/metadata/service/mon/cluster.yml
+++ b/metadata/service/mon/cluster.yml
@@ -7,3 +7,7 @@
ceph:
mon:
enabled: true
+ common:
+ config:
+ mon:
+ mon_max_pg_per_osd: 600
diff --git a/metadata/service/mon/single.yml b/metadata/service/mon/single.yml
index 8be5da5..ea04ea0 100644
--- a/metadata/service/mon/single.yml
+++ b/metadata/service/mon/single.yml
@@ -12,6 +12,9 @@
caps:
mon: "allow *"
common:
+ config:
+ mon:
+ mon_max_pg_per_osd: 600
keyring:
admin:
caps: