Enable nstat input plugin for softnet_stat data

Since we added to nstat's telegraf plugin the possibility
to collect data from `/proc/net/softnet_stat` regarding
dropped packets and rx_net_action a.k.a time squeeze, we need to enable
it globally on all hosts.

Also grafana dashboard update to include new graphs + added four
new Prometheus alers.

Related-Bug: PROD-21090

Change-Id: I9dfe87bdc8b677a51e3f305dd3c75c7d4cc4e0d4
diff --git a/linux/files/grafana_dashboards/system_network_prometheus.json b/linux/files/grafana_dashboards/system_network_prometheus.json
index 13c6827..b05c62e 100644
--- a/linux/files/grafana_dashboards/system_network_prometheus.json
+++ b/linux/files/grafana_dashboards/system_network_prometheus.json
@@ -19,7 +19,7 @@
   "gnetId": null,
   "graphTooltip": 1,
   "id": null,
-  "iteration": 1529498668709,
+  "iteration": 1532690906484,
   "links": [],
   "panels": [
     {
@@ -430,12 +430,561 @@
       }
     },
     {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "decimals": null,
+      "fill": 1,
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 0,
+        "y": 15
+      },
+      "id": 49,
+      "legend": {
+        "alignAsTable": true,
+        "avg": false,
+        "current": true,
+        "hideZero": false,
+        "max": false,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "percentage": false,
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "nstat_time_squeeze{host=~\"$host\"}",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{host}} @{{cpu}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Net RX action@$host per CPU",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "decimals": 0,
+          "format": "none",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "decimals": 0,
+          "format": "none",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "aliasColors": {},
+      "bars": false,
+      "dashLength": 10,
+      "dashes": false,
+      "datasource": null,
+      "fill": 1,
+      "gridPos": {
+        "h": 7,
+        "w": 12,
+        "x": 12,
+        "y": 15
+      },
+      "id": 45,
+      "legend": {
+        "alignAsTable": true,
+        "avg": false,
+        "current": true,
+        "max": false,
+        "min": false,
+        "rightSide": true,
+        "show": true,
+        "total": false,
+        "values": true
+      },
+      "lines": true,
+      "linewidth": 1,
+      "links": [],
+      "nullPointMode": "null",
+      "percentage": false,
+      "pointradius": 5,
+      "points": false,
+      "renderer": "flot",
+      "seriesOverrides": [],
+      "spaceLength": 10,
+      "stack": false,
+      "steppedLine": false,
+      "targets": [
+        {
+          "expr": "nstat_packet_drop{host=~\"$host\"}",
+          "format": "time_series",
+          "intervalFactor": 2,
+          "legendFormat": "{{host}} @{{cpu}}",
+          "refId": "A"
+        }
+      ],
+      "thresholds": [],
+      "timeFrom": null,
+      "timeShift": null,
+      "title": "Dropped packets@$host per CPU",
+      "tooltip": {
+        "shared": true,
+        "sort": 0,
+        "value_type": "individual"
+      },
+      "type": "graph",
+      "xaxis": {
+        "buckets": null,
+        "mode": "time",
+        "name": null,
+        "show": true,
+        "values": []
+      },
+      "yaxes": [
+        {
+          "decimals": 0,
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": "0",
+          "show": true
+        },
+        {
+          "decimals": 0,
+          "format": "short",
+          "label": null,
+          "logBase": 1,
+          "max": null,
+          "min": null,
+          "show": true
+        }
+      ],
+      "yaxis": {
+        "align": false,
+        "alignLevel": null
+      }
+    },
+    {
+      "columns": [],
+      "datasource": null,
+      "fontSize": "100%",
+      "gridPos": {
+        "h": 6,
+        "w": 12,
+        "x": 0,
+        "y": 22
+      },
+      "hideTimeOverride": false,
+      "id": 51,
+      "links": [],
+      "pageSize": null,
+      "scroll": true,
+      "showHeader": true,
+      "sort": {
+        "col": 2,
+        "desc": true
+      },
+      "styles": [
+        {
+          "alias": "Time",
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "pattern": "Time",
+          "type": "hidden"
+        },
+        {
+          "alias": "Increase since 1h",
+          "colorMode": "cell",
+          "colors": [
+            "transparent",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 0,
+          "mappingType": 1,
+          "pattern": "Value #A",
+          "preserveFormat": false,
+          "thresholds": [
+            "2",
+            "4"
+          ],
+          "type": "string",
+          "unit": "none",
+          "valueMaps": [
+            {
+              "text": "-",
+              "value": "-1"
+            }
+          ]
+        },
+        {
+          "alias": "",
+          "colorMode": null,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "mappingType": 1,
+          "pattern": "job",
+          "thresholds": [],
+          "type": "hidden",
+          "unit": "short"
+        },
+        {
+          "alias": "",
+          "colorMode": null,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "mappingType": 1,
+          "pattern": "instance",
+          "thresholds": [],
+          "type": "hidden",
+          "unit": "short"
+        },
+        {
+          "alias": "Increase since 4h",
+          "colorMode": "cell",
+          "colors": [
+            "transparent",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "mappingType": 1,
+          "pattern": "Value #B",
+          "thresholds": [
+            "8",
+            "16"
+          ],
+          "type": "string",
+          "unit": "short",
+          "valueMaps": [
+            {
+              "text": "-",
+              "value": "-1"
+            }
+          ]
+        },
+        {
+          "alias": "Increase since 24h",
+          "colorMode": "cell",
+          "colors": [
+            "transparent",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "mappingType": 1,
+          "pattern": "Value #C",
+          "thresholds": [
+            "50",
+            "100"
+          ],
+          "type": "string",
+          "unit": "short",
+          "valueMaps": [
+            {
+              "text": "-",
+              "value": "-1"
+            }
+          ]
+        },
+        {
+          "alias": "",
+          "colorMode": null,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "decimals": 2,
+          "pattern": "/.*/",
+          "thresholds": [],
+          "type": "number",
+          "unit": "short"
+        }
+      ],
+      "targets": [
+        {
+          "expr": "floor(increase(nstat_time_squeeze{host=~\"$host\"}[1h])) > 0 or increase(nstat_time_squeeze{host=~\"$host\"}[1h]) - 1",
+          "format": "table",
+          "instant": false,
+          "intervalFactor": 2,
+          "refId": "A"
+        },
+        {
+          "expr": "floor(increase(nstat_time_squeeze{host=~\"$host\"}[4h])) > 0 or increase(nstat_time_squeeze{host=~\"$host\"}[4h]) - 1",
+          "format": "table",
+          "intervalFactor": 1,
+          "refId": "B"
+        },
+        {
+          "expr": "floor(increase(nstat_time_squeeze{host=~\"$host\"}[24h])) > 0 or increase(nstat_time_squeeze{host=~\"$host\"}[24h]) - 1",
+          "format": "table",
+          "intervalFactor": 1,
+          "refId": "C"
+        }
+      ],
+      "timeFrom": "1s",
+      "title": "Net RX action@$host per CPU - increased",
+      "transform": "table",
+      "transparent": false,
+      "type": "table"
+    },
+    {
+      "columns": [],
+      "datasource": null,
+      "fontSize": "100%",
+      "gridPos": {
+        "h": 6,
+        "w": 12,
+        "x": 12,
+        "y": 22
+      },
+      "hideTimeOverride": false,
+      "id": 47,
+      "links": [],
+      "pageSize": null,
+      "scroll": true,
+      "showHeader": true,
+      "sort": {
+        "col": 2,
+        "desc": true
+      },
+      "styles": [
+        {
+          "alias": "Time",
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "pattern": "Time",
+          "type": "hidden"
+        },
+        {
+          "alias": "Increase since 1h",
+          "colorMode": "cell",
+          "colors": [
+            "transparent",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 0,
+          "link": false,
+          "mappingType": 1,
+          "pattern": "Value #A",
+          "preserveFormat": false,
+          "rangeMaps": [
+            {
+              "from": "0",
+              "text": "asas",
+              "to": "0"
+            }
+          ],
+          "thresholds": [
+            "2",
+            "4"
+          ],
+          "type": "string",
+          "unit": "none",
+          "valueMaps": [
+            {
+              "text": "-",
+              "value": "-1"
+            }
+          ]
+        },
+        {
+          "alias": "",
+          "colorMode": null,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "mappingType": 1,
+          "pattern": "job",
+          "thresholds": [],
+          "type": "hidden",
+          "unit": "short"
+        },
+        {
+          "alias": "",
+          "colorMode": null,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 2,
+          "mappingType": 1,
+          "pattern": "instance",
+          "thresholds": [],
+          "type": "hidden",
+          "unit": "short"
+        },
+        {
+          "alias": "Increase since 4h",
+          "colorMode": "cell",
+          "colors": [
+            "transparent",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 0,
+          "mappingType": 1,
+          "pattern": "Value #B",
+          "thresholds": [
+            "8",
+            "16"
+          ],
+          "type": "string",
+          "unit": "none",
+          "valueMaps": [
+            {
+              "text": "-",
+              "value": "-1"
+            }
+          ]
+        },
+        {
+          "alias": "Increase since 24h",
+          "colorMode": "cell",
+          "colors": [
+            "transparent",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(245, 54, 54, 0.9)"
+          ],
+          "dateFormat": "YYYY-MM-DD HH:mm:ss",
+          "decimals": 0,
+          "mappingType": 1,
+          "pattern": "Value #C",
+          "thresholds": [
+            "50",
+            "100"
+          ],
+          "type": "string",
+          "unit": "short",
+          "valueMaps": [
+            {
+              "text": "-",
+              "value": "-1"
+            }
+          ]
+        },
+        {
+          "alias": "",
+          "colorMode": null,
+          "colors": [
+            "rgba(245, 54, 54, 0.9)",
+            "rgba(237, 129, 40, 0.89)",
+            "rgba(50, 172, 45, 0.97)"
+          ],
+          "decimals": 2,
+          "pattern": "/.*/",
+          "thresholds": [],
+          "type": "number",
+          "unit": "short"
+        }
+      ],
+      "targets": [
+        {
+          "expr": "floor(increase(nstat_packet_drop{host=~\"$host\"}[1h])) > 0 or increase(nstat_packet_drop{host=~\"$host\"}[1h]) - 1",
+          "format": "table",
+          "hide": false,
+          "instant": false,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "A"
+        },
+        {
+          "expr": "floor(increase(nstat_packet_drop{host=~\"$host\"}[4h])) > 0 or increase(nstat_packet_drop{host=~\"$host\"}[4h]) -1",
+          "format": "table",
+          "hide": false,
+          "instant": false,
+          "interval": "",
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "B"
+        },
+        {
+          "expr": "floor(increase(nstat_packet_drop{host=~\"$host\"}[24h])) > 0 or increase(nstat_packet_drop{host=~\"$host\"}[24h]) -1",
+          "format": "table",
+          "hide": false,
+          "instant": false,
+          "intervalFactor": 2,
+          "legendFormat": "",
+          "refId": "C"
+        }
+      ],
+      "timeFrom": "1s",
+      "title": "Dropped packets@$host per CPU - increased",
+      "transform": "table",
+      "transparent": false,
+      "type": "table"
+    },
+    {
       "collapsed": false,
       "gridPos": {
         "h": 1,
         "w": 24,
         "x": 0,
-        "y": 15
+        "y": 28
       },
       "id": 24,
       "panels": [],
@@ -454,7 +1003,7 @@
         "h": 7,
         "w": 8,
         "x": 0,
-        "y": 16
+        "y": 29
       },
       "id": 8,
       "legend": {
@@ -568,7 +1117,7 @@
         "h": 7,
         "w": 8,
         "x": 8,
-        "y": 16
+        "y": 29
       },
       "id": 15,
       "legend": {
@@ -681,7 +1230,7 @@
         "h": 7,
         "w": 8,
         "x": 16,
-        "y": 16
+        "y": 29
       },
       "id": 13,
       "legend": {
@@ -841,6 +1390,7 @@
           ]
         },
         "datasource": "prometheus",
+
         "hide": 0,
         "includeAll": true,
         "label": null,
@@ -933,6 +1483,6 @@
   "timezone": "browser",
   "title": "System - Networking",
   "uid": null,
-  "version": 22
+  "version": 1
 }
 {% endraw %}
\ No newline at end of file
diff --git a/linux/map.jinja b/linux/map.jinja
index f176068..c333a89 100644
--- a/linux/map.jinja
+++ b/linux/map.jinja
@@ -348,5 +348,13 @@
         'failed_auths_threshold': {
             'warn': 5,
         },
+        'net_rx_action_per_cpu_threshold': {
+            'warning': '0',
+            'minor': '100'
+        },
+        'packets_dropped_per_cpu_threshold': {
+            'minor': '0',
+            'major': '100'
+        }
     },
 }, grain='os_family', merge=salt['pillar.get']('linux:monitoring')) %}
diff --git a/linux/meta/prometheus.yml b/linux/meta/prometheus.yml
index ca4ba3d..3ca2b26 100644
--- a/linux/meta/prometheus.yml
+++ b/linux/meta/prometheus.yml
@@ -208,6 +208,50 @@
       annotations:
         summary: "{{ threshold }}{%- raw %} failed SSH logins"
         description: "{{ $value }} failed SSH login attempts on the {{ $labels.host }} node during the last 5 minutes."
+    PacketsDroppedByCpuMinor:
+      {%- endraw %}
+      {%- set packets_dropped_minor_threshold = monitoring.packets_dropped_per_cpu_threshold.minor %}
+      if: >-
+        floor(increase(nstat_packet_drop[24h])) > {{ packets_dropped_minor_threshold }}
+      labels:
+        severity: minor
+        service: system
+      annotations:
+        summary: "CPU dropped {{ packets_dropped_minor_threshold }}{%- raw %} packets"
+        description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 24 hours."
+    PacketsDroppedByCpuMajor:
+      {%- endraw %}
+      {%- set packets_dropped_major_threshold = monitoring.packets_dropped_per_cpu_threshold.major %}
+      if: >-
+        floor(increase(nstat_packet_drop[24h])) > {{ packets_dropped_major_threshold }}
+      labels:
+        severity: major
+        service: system
+      annotations:
+        summary: "CPU dropped {{ packets_dropped_major_threshold }}{%- raw %} packets"
+        description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 24 hours."
+    NetRxActionByCpuWarning:
+      {%- endraw %}
+      {%- set net_rx_action_warning_threshold = monitoring.net_rx_action_per_cpu_threshold.warning %}
+      if: >-
+        floor(increase(nstat_time_squeeze[24h])) > {{ net_rx_action_warning_threshold }}
+      labels:
+        severity: warning
+        service: system
+      annotations:
+        summary: "CPU terminated {{ net_rx_action_warning_threshold }}{%- raw %} net_rx_action loops"
+        description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours."
+    NetRxActionByCpuMinor:
+      {%- endraw %}
+      {%- set net_rx_action_minor_threshold = monitoring.net_rx_action_per_cpu_threshold.minor %}
+      if: >-
+        floor(increase(nstat_time_squeeze[24h])) > {{ net_rx_action_minor_threshold }}
+      labels:
+        severity: minor
+        service: system
+      annotations:
+        summary: "CPU terminated {{ net_rx_action_minor_threshold }}{%- raw %} net_rx_action loops"
+        description: "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node terminated {{ $value }} net_rx_action loops during the last 24 hours."
 {%- endraw %}
 {%- if monitoring.bond_status.interfaces is defined and monitoring.bond_status.interfaces %}
 {%- raw %}
diff --git a/linux/meta/telegraf.yml b/linux/meta/telegraf.yml
index 0c39da1..d1cd721 100644
--- a/linux/meta/telegraf.yml
+++ b/linux/meta/telegraf.yml
@@ -20,6 +20,10 @@
     kernel:
     net:
     mem:
+    nstat:
+      fieldpass:
+        - packet_drop
+        - time_squeeze
     processes:
     swap:
     system: