updated defaults for mon_max_pg_per_osd variable.
Added additional alerts for pg number on osds.

Change-Id: I5042a166c6c81923c630d05bd7e2499226a707d6
Related-Prod: PROD-26472
diff --git a/ceph/map.jinja b/ceph/map.jinja
index 3140a4c..b2fde96 100644
--- a/ceph/map.jinja
+++ b/ceph/map.jinja
@@ -87,6 +87,8 @@
 default:
   cluster_stats: {}
   node_stats: {}
+  osd_pgnum_warning: 200
+  osd_pgnum_critical: 300
 {%- endload %}
 
 {% set monitoring = salt['grains.filter_by'](monitoring_defaults, merge=salt['pillar.get']('ceph:monitoring')) %}
diff --git a/ceph/meta/prometheus.yml b/ceph/meta/prometheus.yml
index 86c8a47..d29c409 100644
--- a/ceph/meta/prometheus.yml
+++ b/ceph/meta/prometheus.yml
@@ -225,6 +225,32 @@
       annotations:
         summary: "{% endraw %}{{100*threshold}}{% raw %}% of Ceph space is used"
         description: "{{ $ value }} bytes of Ceph OSD space (>={% endraw %}{{100*threshold}}{% raw %}%) is used for 3 minutes. For details, run 'ceph df'."
+    CephOsdPgNumTooHighWarning:
+      {%- endraw %}
+      {%- set threshold = monitoring.osd_pgnum_warning %}
+      if: >-
+        max(ceph_osd_numpg) > {{threshold}}
+      {%- raw %}
+      for: 3m
+      labels:
+        severity: warning
+        service: ceph
+      annotations:
+        summary: "Some OSDs have more than {% endraw %}{{threshold}}{% raw %} PGs"
+        description: "Some OSDs contain more than {% endraw %}{{threshold}}{% raw %} PGs. This may have a negative impact on the cluster performance. For details, run 'ceph pg dump'"
+    CephOsdPgNumTooHighCritical:
+      {%- endraw %}
+      {%- set threshold = monitoring.osd_pgnum_critical %}
+      if: >-
+        max(ceph_osd_numpg) > {{threshold}}
+      {%- raw %}
+      for: 3m
+      labels:
+        severity: critical
+        service: ceph
+      annotations:
+        summary: "Some OSDs have more than {% endraw %}{{threshold}}{% raw %} PGs"
+        description: "Some OSDs contain more than {% endraw %}{{threshold}}{% raw %} PGs. This may have a negative impact on the cluster performance. For details, run 'ceph pg dump'"
       {%- endraw %}
       {%- if setup.pool is defined %}
         {%- for pool_name, pool in setup.pool.iteritems() %}
diff --git a/metadata/service/mon/cluster.yml b/metadata/service/mon/cluster.yml
index 6a10da2..94c484b 100644
--- a/metadata/service/mon/cluster.yml
+++ b/metadata/service/mon/cluster.yml
@@ -7,3 +7,7 @@
   ceph:
     mon:
       enabled: true
+    common:
+      config:
+        mon:
+          mon_max_pg_per_osd: 600
diff --git a/metadata/service/mon/single.yml b/metadata/service/mon/single.yml
index 8be5da5..ea04ea0 100644
--- a/metadata/service/mon/single.yml
+++ b/metadata/service/mon/single.yml
@@ -12,6 +12,9 @@
           caps:
             mon: "allow *"
     common:
+      config:
+        mon:
+          mon_max_pg_per_osd: 600
       keyring:
         admin:
           caps: