Merge "Add monitoring of the swap usage"
diff --git a/linux/files/grafana_dashboards/system_prometheus.json b/linux/files/grafana_dashboards/system_prometheus.json
old mode 100644
new mode 100755
index 735155b..da11d16
--- a/linux/files/grafana_dashboards/system_prometheus.json
+++ b/linux/files/grafana_dashboards/system_prometheus.json
@@ -98,6 +98,7 @@
           "dashes": false,
           "datasource": null,
           "fill": 1,
+          "height": "",
           "id": 1,
           "legend": {
             "avg": false,
@@ -352,7 +353,19 @@
               "show": true
             }
           ]
-        },
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": true,
+      "title": "General",
+      "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": 250,
+      "panels": [
         {
           "aliasColors": {},
           "bars": false,
@@ -652,7 +665,7 @@
       "repeatIteration": null,
       "repeatRowId": null,
       "showTitle": true,
-      "title": "General",
+      "title": "Processes",
       "titleSize": "h6"
     },
     {
@@ -675,7 +688,7 @@
             "minValue": 0,
             "show": true,
             "thresholdLabels": false,
-            "thresholdMarkers": true
+            "thresholdMarkers": false
           },
           "hideTimeOverride": false,
           "id": 11,
@@ -753,7 +766,7 @@
             "minValue": 0,
             "show": true,
             "thresholdLabels": false,
-            "thresholdMarkers": true
+            "thresholdMarkers": false
           },
           "hideTimeOverride": false,
           "id": 12,
@@ -1535,9 +1548,269 @@
       "showTitle": true,
       "title": "Network",
       "titleSize": "h6"
+    },
+    {
+      "collapse": false,
+      "height": 250,
+      "panels": [
+        {
+          "cacheTimeout": null,
+          "colorBackground": false,
+          "colorValue": false,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "datasource": null,
+          "format": "percent",
+          "gauge": {
+            "maxValue": 100,
+            "minValue": 0,
+            "show": true,
+            "thresholdLabels": false,
+            "thresholdMarkers": false
+          },
+          "id": 18,
+          "interval": null,
+          "links": [],
+          "mappingType": 1,
+          "mappingTypes": [
+            {
+              "name": "value to text",
+              "value": 1
+            },
+            {
+              "name": "range to text",
+              "value": 2
+            }
+          ],
+          "maxDataPoints": 100,
+          "nullPointMode": "connected",
+          "nullText": null,
+          "postfix": "",
+          "postfixFontSize": "50%",
+          "prefix": "",
+          "prefixFontSize": "50%",
+          "rangeMaps": [
+            {
+              "from": "null",
+              "text": "N/A",
+              "to": "null"
+            }
+          ],
+          "span": 2,
+          "sparkline": {
+            "fillColor": "rgba(31, 118, 189, 0.18)",
+            "full": false,
+            "lineColor": "rgb(31, 120, 193)",
+            "show": false
+          },
+          "tableColumn": "",
+          "targets": [
+            {
+              "expr": "swap_used_percent{host=\"$host\"}",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "refId": "A",
+              "step": 60
+            }
+          ],
+          "thresholds": "",
+          "title": "Used",
+          "type": "singlestat",
+          "valueFontSize": "80%",
+          "valueMaps": [
+            {
+              "op": "=",
+              "text": "N/A",
+              "value": "null"
+            }
+          ],
+          "valueName": "current"
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": null,
+          "decimals": null,
+          "fill": 0,
+          "id": 17,
+          "legend": {
+            "alignAsTable": false,
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "rightSide": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 5,
+          "stack": true,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "swap_used{host=\"$host\"}",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "used",
+              "refId": "A",
+              "step": 10
+            },
+            {
+              "expr": "swap_free{host=\"$host\"}",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "free",
+              "refId": "B",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "Usage",
+          "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "bytes",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": "0",
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        },
+        {
+          "aliasColors": {},
+          "bars": false,
+          "dashLength": 10,
+          "dashes": false,
+          "datasource": null,
+          "fill": 0,
+          "id": 19,
+          "legend": {
+            "avg": false,
+            "current": false,
+            "max": false,
+            "min": false,
+            "show": true,
+            "total": false,
+            "values": false
+          },
+          "lines": true,
+          "linewidth": 1,
+          "links": [],
+          "nullPointMode": "null",
+          "percentage": false,
+          "pointradius": 5,
+          "points": false,
+          "renderer": "flot",
+          "seriesOverrides": [],
+          "spaceLength": 10,
+          "span": 5,
+          "stack": false,
+          "steppedLine": false,
+          "targets": [
+            {
+              "expr": "irate(swap_in{host=\"$host\"}[5m])",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "in",
+              "refId": "A",
+              "step": 10
+            },
+            {
+              "expr": "irate(swap_out{host=\"$host\"}[5m])",
+              "format": "time_series",
+              "intervalFactor": 2,
+              "legendFormat": "out",
+              "refId": "B",
+              "step": 10
+            }
+          ],
+          "thresholds": [],
+          "timeFrom": null,
+          "timeShift": null,
+          "title": "I/O",
+          "tooltip": {
+            "shared": true,
+            "sort": 0,
+            "value_type": "individual"
+          },
+          "type": "graph",
+          "xaxis": {
+            "buckets": null,
+            "mode": "time",
+            "name": null,
+            "show": true,
+            "values": []
+          },
+          "yaxes": [
+            {
+              "format": "Bps",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": "0",
+              "show": true
+            },
+            {
+              "format": "short",
+              "label": null,
+              "logBase": 1,
+              "max": null,
+              "min": null,
+              "show": true
+            }
+          ]
+        }
+      ],
+      "repeat": null,
+      "repeatIteration": null,
+      "repeatRowId": null,
+      "showTitle": true,
+      "title": "Swap",
+      "titleSize": "h6"
     }
   ],
   "schemaVersion": 14,
+  "sharedCrosshair": true,
   "style": "dark",
   "tags": [],
   "templating": {
@@ -1554,6 +1827,7 @@
         "options": [],
         "query": "label_values(cpu_usage_idle,host)",
         "refresh": 1,
+        "refresh_on_load": true,
         "regex": "",
         "sort": 1,
         "tagValuesQuery": "",
@@ -1574,6 +1848,7 @@
         "options": [],
         "query": "query_result(diskio_read_bytes{host=\"$host\"})",
         "refresh": 1,
+        "refresh_on_load": true,
         "regex": "/name=\"([^\"]+)/",
         "sort": 1,
         "tagValuesQuery": "",
@@ -1594,6 +1869,7 @@
         "options": [],
         "query": "query_result(disk_free{host=\"$host\"})",
         "refresh": 1,
+        "refresh_on_load": true,
         "regex": "/path=\"([^\"]+)/",
         "sort": 1,
         "tagValuesQuery": "",
@@ -1614,6 +1890,7 @@
         "options": [],
         "query": "query_result(net_bytes_recv{host=\"$host\"})",
         "refresh": 1,
+        "refresh_on_load": true,
         "regex": "/interface=\"([^\"]+)/",
         "sort": 1,
         "tagValuesQuery": "",
@@ -1655,5 +1932,5 @@
   },
   "timezone": "browser",
   "title": "System",
-  "version": 31
+  "version": 32
 }
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index 485d4c0..771e3fe 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -70,3 +70,33 @@
       annotations:
         summary: 'Too many transmitted packets dropped on {{ $labels.host }} for interface {{ $labels.interface }}'
         description: 'The average number of transmitted packets which are dropped is too high on node {{ $labels.host }} for interface {{ $label.interface }} (current value={{ $value }}, threshold={% endraw %}{{ net_tx_dropped_threshold }})'
+    SystemSwapUsed:
+      {%- set swap_used_threshold = prometheus_server.get('alert', {}).get('SystemSwapUsed', {}).get('var', {}).get('threshold', 80) %}
+      if: avg_over_time(swap_used_percent[1m]) > {{ swap_used_threshold }}
+      {% raw %}
+      labels:
+        severity: warning
+        service: system
+      annotations:
+        summary: 'Swap usage too high on {{ $labels.host }}'
+        description: 'The average percentage of used swap is too high on node {{ $labels.host }} (current value={{ $value }}%, threshold={% endraw %}{{ swap_used_threshold }})'
+    SystemSwapIn:
+      {%- set swap_in_threshold = prometheus_server.get('alert', {}).get('SystemSwapIn', {}).get('var', {}).get('threshold', 1024 * 1024) %}
+      if: rate(swap_in[2m]) > {{ swap_in_threshold }}
+      {% raw %}
+      labels:
+        severity: warning
+        service: system
+      annotations:
+        summary: 'Swap input throughput too high on {{ $labels.host }}'
+        description: 'The rate of swap input bytes is too high on node {{ $labels.host }} (current value={{ $value }}b/s, threshold={% endraw %}{{ swap_in_threshold }})'
+    SystemSwapOut:
+      {%- set swap_out_threshold = prometheus_server.get('alert', {}).get('SystemSwapOut', {}).get('var', {}).get('threshold', 1024 * 1024) %}
+      if: rate(swap_out[2m]) > {{ swap_out_threshold }}
+      {% raw %}
+      labels:
+        severity: warning
+        service: system
+      annotations:
+        summary: 'Swap output throughput too high on {{ $labels.host }}'
+        description: 'The rate of swap output bytes is too high on node {{ $labels.host }} (current value={{ $value }}b/s, threshold={% endraw %}{{ swap_out_threshold }})'
diff --git a/linux/meta/telegraf.yml b/linux/meta/telegraf.yml
index bc689ae..e9d604f 100644
--- a/linux/meta/telegraf.yml
+++ b/linux/meta/telegraf.yml
@@ -9,4 +9,5 @@
     net:
     mem:
     processes:
+    swap:
     system: