blob: 1cb356f0ee18e810bf966472844fef4cc41c3dd0 [file] [log] [blame]
{
"return": [
{
"cmp01.fakedomain.local": {
"biosversion": "1.10.2-1.1~u16.04+mcp2",
"kernel": "Linux",
"domain": "ozhurba-os-oc-cicd-sl.local",
"uid": 0,
"zmqversion": "4.1.4",
"kernelrelease": "4.15.0-43-generic",
"pythonpath": [
"/usr/bin",
"/usr/lib/python2.7",
"/usr/lib/python2.7/plat-x86_64-linux-gnu",
"/usr/lib/python2.7/lib-tk",
"/usr/lib/python2.7/lib-old",
"/usr/lib/python2.7/lib-dynload",
"/usr/local/lib/python2.7/dist-packages",
"/usr/lib/python2.7/dist-packages"
],
"serialnumber": "c7b67a52-b1a5-4d50-9069-599cc0aad878",
"pid": 2253,
"telegraf": {
"remote_agent": {
"input": {},
"processor": {},
"dir": {
"config": "/srv/volumes/local/telegraf",
"config_d": "/srv/volumes/local/telegraf/telegraf.d"
},
"output": {}
},
"agent": {
"metric_batch_size": 1000,
"collection_jitter": 2,
"interval": 15,
"enabled": true,
"pkgs": [
"telegraf"
],
"round_interval": false,
"output": {
"prometheus_client": {
"engine": "prometheus",
"bind": {
"port": 9126,
"address": "0.0.0.0"
},
"string_as_label": false
}
},
"input": {
"kernel": null,
"processes": null,
"nstat": {
"fieldpass": [
"packet_drop",
"time_squeeze"
]
},
"swap": null,
"mem": null,
"ntp": {
"template": "ntp/files/telegraf.conf"
},
"system": null,
"http_listener": {
"read_timeout": "10s",
"bind": {
"port": 8186,
"address": "127.0.0.1"
},
"tagexclude": [
"hostname"
],
"write_timeout": "10s"
},
"linux_sysctl_fs": null,
"diskio": null,
"procstat": {
"process": {
"contrail-vrouter-agent": {
"pattern": "contrail-vrouter-agent"
},
"sshd": {
"exe": "sshd"
},
"salt-minion": {
"pattern": "salt-minion"
},
"cron": {
"exe": "cron"
},
"contrail-nodemgr-vrouter": {
"pattern": "python.*contrail-nodemgr.*-vrouter"
},
"ntpd": {
"exe": "ntpd"
}
}
},
"net": null,
"disk": {
"ignore_fs": [
"aufs",
"rootfs",
"sysfs",
"proc",
"devtmpfs",
"devpts",
"tmpfs",
"fusectl",
"cgroup",
"overlay"
]
},
"cpu": {
"totalcpu": true,
"percpu": false
},
"http_response": {
"contrail-node-manager": {
"address": "http://127.0.0.1:8102/"
},
"contrail-vrouter": {
"address": "http://127.0.0.1:8085/"
}
}
},
"metric_buffer_limit": 10000,
"processor": {},
"dir": {
"config": "/etc/telegraf",
"config_d": "/etc/telegraf/telegraf.d"
}
}
},
"ip_interfaces": {
"vethec13d320-3": [
"fe80::c87c:a9ff:fe17:a601"
],
"veth880f8ede-a": [
"fe80::f0b6:bbff:fe72:d0cc"
],
"veth256f51fd-c": [
"fe80::688f:17ff:fe12:c526"
],
"veth18c9ae6b-9": [
"fe80::c2c:88ff:fe42:b3c8"
],
"veth0ee2fae3-b": [
"fe80::70a6:c4ff:fef3:6955"
],
"ens4": [
"10.10.100.5",
"fe80::f816:3eff:feee:9510"
],
"ens5": [
"fe80::f816:3eff:fe6a:ff78"
],
"ens6": [
"10.13.100.26",
"fe80::f816:3eff:fe3c:67ee"
],
"ens3": [
"10.11.1.1",
"fe80::f816:3eff:fed0:8639"
],
"veth7739e3f4-f": [
"fe80::a8f8:2eff:fe83:6dc5"
],
"lo": [
"127.0.0.1",
"::1"
],
"vethccc56015-8": [
"fe80::ccbf:97ff:fe1e:c4b4"
],
"vethc03db88c-1": [
"fe80::7b:e3ff:fea7:a8ff"
],
"veth67e6d989-e": [
"fe80::6c3f:19ff:feaf:edba"
],
"vhost0": [
"10.12.1.1",
"fe80::f816:3eff:fe6a:ff78"
],
"veth8342151a-7": [
"fe80::1c3e:f3ff:fed5:5872"
],
"vethd53dd8ab-d": [
"fe80::e4e8:42ff:fec2:b2ed"
],
"veth108c39b4-b": [
"fe80::784c:7cff:fee8:d005"
],
"pkt0": [
"fe80::c52:1bff:feec:2a07"
],
"veth625ca23d-8": [
"fe80::1c9d:65ff:fe06:979c"
],
"vethcabb806e-4": [
"fe80::5c08:5aff:fe1b:2d3a"
],
"vethc1f26111-e": [
"fe80::bc15:baff:fed1:b96e"
],
"pkt3": [],
"pkt2": [],
"pkt1": [],
"veth783896a6-1": [
"fe80::ec0d:87ff:fefb:9241"
]
},
"groupname": "root",
"fqdn_ip6": [],
"mem_total": 7976,
"saltversioninfo": [
2017,
7,
8,
0
],
"SSDs": [],
"mdadm": [],
"id": "cmp1.ozhurba-os-oc-cicd-sl.local",
"manufacturer": "OpenStack Foundation",
"osrelease": "16.04",
"ps": "ps -efHww",
"systemd": {
"version": "229",
"features": "+PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ -LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD -IDN"
},
"fqdn": "cmp1.ozhurba-os-oc-cicd-sl.local",
"uuid": "62cb8077-9932-42e2-95fc-27f611de39f6",
"ip6_interfaces": {
"vethec13d320-3": [
"fe80::c87c:a9ff:fe17:a601"
],
"veth880f8ede-a": [
"fe80::f0b6:bbff:fe72:d0cc"
],
"veth256f51fd-c": [
"fe80::688f:17ff:fe12:c526"
],
"veth18c9ae6b-9": [
"fe80::c2c:88ff:fe42:b3c8"
],
"veth0ee2fae3-b": [
"fe80::70a6:c4ff:fef3:6955"
],
"ens4": [
"fe80::f816:3eff:feee:9510"
],
"ens5": [
"fe80::f816:3eff:fe6a:ff78"
],
"ens6": [
"fe80::f816:3eff:fe3c:67ee"
],
"ens3": [
"fe80::f816:3eff:fed0:8639"
],
"veth7739e3f4-f": [
"fe80::a8f8:2eff:fe83:6dc5"
],
"lo": [
"::1"
],
"vethccc56015-8": [
"fe80::ccbf:97ff:fe1e:c4b4"
],
"vethc03db88c-1": [
"fe80::7b:e3ff:fea7:a8ff"
],
"veth67e6d989-e": [
"fe80::6c3f:19ff:feaf:edba"
],
"vhost0": [
"fe80::f816:3eff:fe6a:ff78"
],
"veth8342151a-7": [
"fe80::1c3e:f3ff:fed5:5872"
],
"vethd53dd8ab-d": [
"fe80::e4e8:42ff:fec2:b2ed"
],
"veth108c39b4-b": [
"fe80::784c:7cff:fee8:d005"
],
"pkt0": [
"fe80::c52:1bff:feec:2a07"
],
"veth625ca23d-8": [
"fe80::1c9d:65ff:fe06:979c"
],
"vethcabb806e-4": [
"fe80::5c08:5aff:fe1b:2d3a"
],
"vethc1f26111-e": [
"fe80::bc15:baff:fed1:b96e"
],
"pkt3": [],
"pkt2": [],
"pkt1": [],
"veth783896a6-1": [
"fe80::ec0d:87ff:fefb:9241"
]
},
"num_cpus": 4,
"hwaddr_interfaces": {
"vethec13d320-3": "ca:7c:a9:17:a6:01",
"veth880f8ede-a": "f2:b6:bb:72:d0:cc",
"veth256f51fd-c": "6a:8f:17:12:c5:26",
"veth18c9ae6b-9": "0e:2c:88:42:b3:c8",
"veth0ee2fae3-b": "72:a6:c4:f3:69:55",
"ens4": "fa:16:3e:ee:95:10",
"ens5": "fa:16:3e:6a:ff:78",
"ens6": "fa:16:3e:3c:67:ee",
"ens3": "fa:16:3e:d0:86:39",
"veth7739e3f4-f": "aa:f8:2e:83:6d:c5",
"lo": "00:00:00:00:00:00",
"vethccc56015-8": "ce:bf:97:1e:c4:b4",
"vethc03db88c-1": "02:7b:e3:a7:a8:ff",
"veth67e6d989-e": "6e:3f:19:af:ed:ba",
"vhost0": "fa:16:3e:6a:ff:78",
"veth8342151a-7": "1e:3e:f3:d5:58:72",
"vethd53dd8ab-d": "e6:e8:42:c2:b2:ed",
"veth108c39b4-b": "7a:4c:7c:e8:d0:05",
"pkt0": "0e:52:1b:ec:2a:07",
"veth625ca23d-8": "1e:9d:65:06:97:9c",
"vethcabb806e-4": "5e:08:5a:1b:2d:3a",
"vethc1f26111-e": "be:15:ba:d1:b9:6e",
"pkt3": "62:2a:6b:43:22:23",
"pkt2": "9a:6f:3c:e5:f1:68",
"pkt1": "b6:fa:51:a7:bf:3c",
"veth783896a6-1": "ee:0d:87:fb:92:41"
},
"init": "systemd",
"ip4_interfaces": {
"vethec13d320-3": [],
"veth880f8ede-a": [],
"veth256f51fd-c": [],
"veth18c9ae6b-9": [],
"veth0ee2fae3-b": [],
"ens4": [
"10.10.100.5"
],
"ens5": [],
"ens6": [
"10.13.100.26"
],
"ens3": [
"10.11.1.1"
],
"veth7739e3f4-f": [],
"lo": [
"127.0.0.1"
],
"vethccc56015-8": [],
"vethc03db88c-1": [],
"veth67e6d989-e": [],
"vhost0": [
"10.12.1.1"
],
"veth8342151a-7": [],
"vethd53dd8ab-d": [],
"veth108c39b4-b": [],
"pkt0": [],
"veth625ca23d-8": [],
"vethcabb806e-4": [],
"vethc1f26111-e": [],
"pkt3": [],
"pkt2": [],
"pkt1": [],
"veth783896a6-1": []
},
"grafana": {
"dashboard": {
"openstack_tenants_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/openstack_tenants_prometheus.json",
"format": "json"
},
"contrail_controller_prometheus": {
"datasource": "prometheus",
"template": "opencontrail/files/grafana_dashboards/contrail_4_controller_prometheus.json",
"format": "json"
},
"cassandra_influxdb": {
"datasource": "influxdb",
"template": "opencontrail/files/grafana_dashboards/cassandra_influxdb.json",
"format": "json"
},
"ntp_prometheus": {
"datasource": "prometheus",
"template": "ntp/files/grafana_dashboards/ntp_prometheus.json",
"format": "json"
},
"cinder_prometheus": {
"datasource": "prometheus",
"template": "cinder/files/grafana_dashboards/cinder_prometheus_fluentd.json",
"format": "json"
},
"nova_utilization_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_utilization_prometheus.json",
"format": "json"
},
"hypervisor_influxdb": {
"datasource": "influxdb",
"template": "nova/files/grafana_dashboards/hypervisor_influxdb.json",
"format": "json"
},
"contrail_vrouter_prometheus": {
"datasource": "prometheus",
"template": "opencontrail/files/grafana_dashboards/contrail_4_vrouter_prometheus.json",
"format": "json"
},
"cinder_influxdb": {
"datasource": "influxdb",
"template": "cinder/files/grafana_dashboards/cinder_influxdb.json",
"format": "json"
},
"zookeeper_prometheus": {
"datasource": "prometheus",
"template": "opencontrail/files/grafana_dashboards/zookeeper_prometheus.json",
"format": "json"
},
"openstack_overview_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/openstack_overview_prometheus.json",
"format": "json"
},
"linux_network_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_network_prometheus.json",
"format": "json"
},
"service_level": {
"datasource": "influxdb",
"row": {
"cinder-service-level": {
"title": "Cinder Service Levels",
"panel": {
"cinder-api-requests": {
"target": {
"cinder-api-okay-status": {
"alias": "Okay",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_cinder_http_response_times WHERE environment_label = '$environment' AND (http_status = '2xx' OR http_status = '3xx') AND $timeFilter"
},
"cinder-api-fatal-status": {
"alias": "Fatal",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_cinder_http_response_times WHERE environment_label = '$environment' AND http_status = '5xx' AND $timeFilter"
},
"cinder-api-error-status": {
"alias": "Error",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_cinder_http_response_times WHERE environment_label = '$environment' AND http_status = '4xx' AND $timeFilter"
}
},
"title": "Cinder API Requests"
},
"cinder-control-uptime": {
"target": {
"cinder-control-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-control' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
},
"cinder-control-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-control' AND value = 0 AND $timeFilter"
},
"cinder-control-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-control' AND value = 4 AND $timeFilter"
}
},
"title": "Cinder Control Uptime"
},
"cinder-data-uptime": {
"target": {
"cinder-data-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-data' AND value = 0 AND $timeFilter"
},
"cinder-data-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-data' AND value = 4 AND $timeFilter"
},
"cinder-data-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-data' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
}
},
"title": "Cinder Data Uptime"
},
"cinder-api-availability": {
"target": {
"cinder-api-up-status": {
"alias": "Up",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND (service = 'cinder-api' OR service = 'cinder-v2-api') AND value = 1 AND $timeFilter"
},
"cinder-api-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND (service = 'cinder-api' OR service = 'cinder-v2-api') AND value = 0 AND $timeFilter"
}
},
"title": "Cinder API Availability (V1 & V2)"
}
}
},
"nova-service-level": {
"title": "Nova Service Levels",
"panel": {
"nova-control-uptime": {
"target": {
"nova-control-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-control' AND value = 0 AND $timeFilter"
},
"nova-control-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-control' AND value = 4 AND $timeFilter"
},
"nova-control-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-control' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
}
},
"title": "Nova Control Uptime"
},
"nova-data-uptime": {
"target": {
"nova-data-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-data' AND value = 0 AND $timeFilter"
},
"nova-data-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-data' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
},
"nova-data-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-data' AND value = 4 AND $timeFilter"
}
},
"title": "Nova Data Uptime"
},
"nova-api-availability": {
"target": {
"nova-api-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'nova-api' AND value = 0 AND $timeFilter"
},
"nova-api-up-status": {
"alias": "Up",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'nova-api' AND value = 1 AND $timeFilter"
}
},
"title": "Nova API Availability"
},
"nova-api-requests": {
"target": {
"nova-api-okay-status": {
"alias": "Okay",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_nova_http_response_times WHERE environment_label = '$environment' AND (http_status = '2xx' OR http_status = '3xx') AND $timeFilter"
},
"nova-api-error-status": {
"alias": "Error",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_nova_http_response_times WHERE environment_label = '$environment' AND http_status = '4xx' AND $timeFilter"
},
"nova-api-fatal-status": {
"alias": "Fatal",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_nova_http_response_times WHERE environment_label = '$environment' AND http_status = '5xx' AND $timeFilter"
}
},
"title": "Nova API Requests"
}
}
}
}
},
"linux_influxdb": {
"datasource": "influxdb",
"template": "linux/files/grafana_dashboards/system_influxdb.json",
"format": "json"
},
"nova_instances_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_instances_prometheus.json",
"format": "json"
},
"nova_influxdb": {
"datasource": "influxdb",
"template": "nova/files/grafana_dashboards/nova_influxdb.json",
"format": "json"
},
"main_prometheus": {
"datasource": "prometheus",
"row": {
"ost-control-plane": {
"title": "OpenStack Control Plane",
"panel": {
"cinder": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=~\"cinder.*\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Cinder",
"title": "Cinder"
}
],
"title": "Cinder"
},
"nova": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=\"nova\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Nova",
"title": "Nova"
}
],
"title": "Nova"
}
}
}
}
},
"contrail_influxdb": {
"datasource": "influxdb",
"template": "opencontrail/files/grafana_dashboards/contrail_influxdb.json",
"format": "json"
},
"linux_disk_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_disk_prometheus.json",
"format": "json"
},
"cassandra_prometheus": {
"datasource": "prometheus",
"template": "opencontrail/files/grafana_dashboards/cassandra_prometheus.json",
"format": "json"
},
"zookeeper_influxdb": {
"datasource": "influxdb",
"template": "opencontrail/files/grafana_dashboards/zookeeper_influxdb.json",
"format": "json"
},
"nova_hypervisor_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_hypervisor_prometheus.json",
"format": "json"
},
"main_influxdb": {
"datasource": "influxdb",
"row": {
"ost-data-plane": {
"title": "OpenStack Data Plane",
"panel": {
"cinder": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'cinder-data' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Cinder",
"title": "Cinder"
}
],
"title": "Cinder"
},
"nova": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'nova-data' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Nova",
"title": "Nova"
}
],
"title": "Nova"
}
}
},
"ost-control-plane": {
"title": "OpenStack Control Plane",
"panel": {
"cinder": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'cinder-control' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Cinder",
"title": "Cinder"
}
],
"title": "Cinder"
},
"nova": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'nova-control' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Nova",
"title": "Nova"
}
],
"title": "Nova"
}
}
}
}
},
"nova_overview_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_overview_prometheus.json",
"format": "json"
},
"linux_overview_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_overview_prometheus.json",
"format": "json"
}
},
"parameters": null
},
"ssh_fingerprints": {
"rsa": "b2:cd:0e:6d:38:fa:37:de:b6:0f:89:81:85:72:77:a2",
"ecdsa": "22:8e:8d:63:f5:6b:3b:60:3e:20:fd:0c:68:22:2c:0c",
"dsa": "d2:03:00:39:78:2b:af:dc:81:18:b5:e0:d3:e1:f0:74"
},
"gid": 0,
"master": "10.10.0.15",
"ipv4": [
"10.10.100.5",
"10.11.1.1",
"10.12.1.1",
"10.13.100.26",
"127.0.0.1"
],
"dns": {
"domain": "",
"sortlist": [],
"nameservers": [
"172.18.176.6",
"172.17.44.91"
],
"ip4_nameservers": [
"172.18.176.6",
"172.17.44.91"
],
"search": [
"openstacklocal"
],
"ip6_nameservers": [],
"options": []
},
"ipv6": [
"::1",
"fe80::7b:e3ff:fea7:a8ff",
"fe80::c2c:88ff:fe42:b3c8",
"fe80::c52:1bff:feec:2a07",
"fe80::1c3e:f3ff:fed5:5872",
"fe80::1c9d:65ff:fe06:979c",
"fe80::5c08:5aff:fe1b:2d3a",
"fe80::688f:17ff:fe12:c526",
"fe80::6c3f:19ff:feaf:edba",
"fe80::70a6:c4ff:fef3:6955",
"fe80::784c:7cff:fee8:d005",
"fe80::a8f8:2eff:fe83:6dc5",
"fe80::bc15:baff:fed1:b96e",
"fe80::c87c:a9ff:fe17:a601",
"fe80::ccbf:97ff:fe1e:c4b4",
"fe80::e4e8:42ff:fec2:b2ed",
"fe80::ec0d:87ff:fefb:9241",
"fe80::f0b6:bbff:fe72:d0cc",
"fe80::f816:3eff:fe3c:67ee",
"fe80::f816:3eff:fe6a:ff78",
"fe80::f816:3eff:fed0:8639",
"fe80::f816:3eff:feee:9510"
],
"server_id": 1847579797,
"cpu_flags": [
"fpu",
"vme",
"de",
"pse",
"tsc",
"msr",
"pae",
"mce",
"cx8",
"apic",
"sep",
"mtrr",
"pge",
"mca",
"cmov",
"pat",
"pse36",
"clflush",
"mmx",
"fxsr",
"sse",
"sse2",
"ss",
"syscall",
"nx",
"pdpe1gb",
"rdtscp",
"lm",
"constant_tsc",
"arch_perfmon",
"rep_good",
"nopl",
"xtopology",
"cpuid",
"pni",
"pclmulqdq",
"vmx",
"ssse3",
"fma",
"cx16",
"pcid",
"sse4_1",
"sse4_2",
"x2apic",
"movbe",
"popcnt",
"tsc_deadline_timer",
"aes",
"xsave",
"avx",
"f16c",
"rdrand",
"hypervisor",
"lahf_lm",
"abm",
"3dnowprefetch",
"cpuid_fault",
"invpcid_single",
"pti",
"ssbd",
"ibrs",
"ibpb",
"tpr_shadow",
"vnmi",
"flexpriority",
"ept",
"vpid",
"fsgsbase",
"tsc_adjust",
"bmi1",
"hle",
"avx2",
"smep",
"bmi2",
"erms",
"invpcid",
"rtm",
"rdseed",
"adx",
"smap",
"xsaveopt",
"arat"
],
"osfullname": "Ubuntu",
"localhost": "cmp1",
"lsb_distrib_id": "Ubuntu",
"username": "root",
"fqdn_ip4": [
"10.11.1.1"
],
"shell": "/bin/sh",
"nodename": "cmp1",
"saltversion": "2017.7.8",
"lsb_distrib_release": "16.04",
"saltpath": "/usr/lib/python2.7/dist-packages/salt",
"pythonversion": [
2,
7,
12,
"final",
0
],
"host": "cmp1",
"os_family": "Debian",
"oscodename": "xenial",
"services": [
"fluentd",
"telegraf",
"rsyslog",
"openssh",
"ntp",
"opencontrail",
"nova",
"grafana",
"prometheus",
"logrotate",
"_reclass_",
"linux",
"cinder",
"salt",
"openscap"
],
"osfinger": "Ubuntu-16.04",
"biosreleasedate": "04/01/2014",
"dns_records": [
{
"names": [
"cmp1.ozhurba-os-oc-cicd-sl.local",
"cmp1"
],
"address": "10.11.1.1"
}
],
"lsb_distrib_description": "Ubuntu 16.04.5 LTS",
"sphinx": {
"doc": {
"cinder": {
"role": {
"volume": {
"name": "volume",
"param": {
"message_queue_ip": {
"name": "Message queue",
"value": "openstack@10.11.0.10:5672/openstack"
},
"version": {
"name": "Version",
"value": "queens"
},
"backends": {
"value": "* lvm-driver:\n * storage engine: lvm\n * volume type: lvm-driver"
},
"identity_host": {
"name": "Identity service",
"value": "cinder@10.11.0.10:35357"
},
"packages": {
"value": "* cinder-volume: 2:12.0.4-2~u16.04+mcp96\n* lvm2: 2.02.133-1ubuntu10\n* sysfsutils: 2.1.0+repack-4\n* sg3-utils: 1.40-0ubuntu1\n* python-cinder: 2:12.0.4-2~u16.04+mcp96\n* python-mysqldb: 1.3.7-1build2\n* p7zip: 9.20.1~dfsg.1-4.2\n* gettext-base: 0.19.7-2ubuntu3.1\n* python-memcache: 1.57+fixed-1~u16.04+mcp1\n* python-pycadf: 2.6.0-1~u16.04+mcp2\n"
},
"database_host": {
"name": "Database",
"value": "cinder@10.11.0.50:3306//cinder"
}
}
}
},
"description": "Cinder provides an infrastructure for managing volumes in OpenStack. It was originally a Nova component called nova-volume, but has become an independent project since the Folsom release.",
"name": "Cinder"
},
"opencontrail": {
"role": {
"vrouter": {
"name": "vrouter",
"param": {
"disable_flow_collection": {
"name": "Disable flow collection",
"value": true
},
"compute_interface": {
"name": "vrouter vhost0 interface",
"value": "* interface binded: ens5\n* ip address: 10.12.1.1\n* MTU: 9000\n"
},
"packages": {
"value": "* contrail-utils: 4.0~20190123144438-0\n* iproute2: 4.3.0-1ubuntu3.16.04.4\n* haproxy: 1.6.3-1ubuntu0.1\n* linux-headers-4.15.0-43-generic: 4.15.0-43.46~16.04.1\n* contrail-nova-driver: 4.0~20190123144438-0"
},
"version": {
"name": "Contrail version",
"value": 4.0
}
}
}
},
"description": "OpenContrail is an open source network virtualization platform for the cloud.",
"name": "OpenContrail"
},
"salt": {
"role": {
"minion": {
"name": "minion",
"param": {
"version": {
"value": "2017.7.8 (Nitrogen)"
}
}
}
},
"description": "Salt is a new approach to infrastructure management. Easy enough to get running in minutes, scalable enough to manage tens of thousands of servers, and fast enough to communicate with them in seconds.",
"name": "Salt"
},
"nova": {
"role": {
"compute": {
"name": "compute",
"param": {
"message_queue_ip": {
"name": "Message queue",
"value": "openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.41:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.42:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.43:5672//openstack"
},
"network_host": {
"name": "Network service",
"value": "10.11.0.10:9696"
},
"vncproxy_url": {
"name": "VNC proxy URL",
"value": "https://10.13.250.9:6080"
},
"reserved_host_memory_mb": {
"name": "Reserved Host Memmory",
"value": 1100
},
"glance_host": {
"name": "Image service",
"value": "10.11.0.10:9292"
},
"version": {
"name": "Version",
"value": "queens"
},
"identity_host": {
"name": "Identity host ip",
"value": "nova@10.11.0.10:35357"
},
"packages": {
"value": "* nova-common: 2:17.0.9-6~u16.01+mcp93\n* nova-compute-kvm: 2:17.0.9-6~u16.01+mcp93\n* python-novaclient: 2:9.1.1-1~u16.04+mcp6\n* pm-utils: 1.4.1-16\n* sysfsutils: 2.1.0+repack-4\n* sg3-utils: 1.40-0ubuntu1\n* python-memcache: 1.57+fixed-1~u16.04+mcp1\n* python-guestfs: 1:1.32.2-4ubuntu2\n* gettext-base: 0.19.7-2ubuntu3.1"
},
"database_host": {
"name": "Database",
"value": "nova@10.11.0.50:3306/nova"
}
}
}
},
"description": "OpenStack Nova provides a cloud computing fabric controller, supporting a wide variety of virtualization technologies, including KVM, Xen, LXC, VMware, and more.",
"name": "Nova"
},
"linux": {
"role": {
"network": {
"name": "Network",
"param": {
"ip": {
"name": "IP Addresses",
"value": [
"10.10.100.5",
"10.11.1.1",
"10.12.1.1",
"10.13.100.26",
"127.0.0.1"
]
},
"fqdn": {
"name": "FQDN",
"value": "cmp1.ozhurba-os-oc-cicd-sl.local"
}
}
},
"system": {
"name": "System",
"param": {
"kernel": {
"value": "Linux 4.15.0-43-generic"
},
"distribution": {
"value": "Ubuntu 16.04.5 LTS"
},
"name": {
"value": "cmp1"
}
}
}
},
"description": "Linux is a high performance, yet completely free, Unix-like operating system that is suitable for use on a wide range of computers and other products.",
"name": "Linux"
}
}
},
"num_gpus": 1,
"roles": [
"fluentd.agent",
"telegraf.agent",
"rsyslog.client",
"openssh.client",
"openssh.server",
"ntp.client",
"opencontrail.client",
"opencontrail.compute",
"nova.compute",
"grafana.collector",
"prometheus.collector",
"logrotate.server",
"linux.storage",
"linux.system",
"linux.network",
"cinder.volume",
"salt.minion",
"openscap.service"
],
"virtual": "kvm",
"os": "Ubuntu",
"disks": [
"loop1",
"dm-1",
"loop6",
"dm-6",
"vdb",
"loop4",
"dm-4",
"loop2",
"dm-2",
"loop0",
"dm-0",
"loop7",
"loop5",
"dm-5",
"vda",
"loop3",
"dm-3"
],
"cpu_model": "Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz",
"osmajorrelease": 16,
"pythonexecutable": "/usr/bin/python",
"productname": "OpenStack Nova",
"osarch": "amd64",
"cpuarch": "x86_64",
"lsb_distrib_codename": "xenial",
"osrelease_info": [
16,
4
],
"locale_info": {
"detectedencoding": "UTF-8",
"defaultlanguage": "en_US",
"defaultencoding": "UTF-8"
},
"gpus": [
{
"model": "GD 5446",
"vendor": "unknown"
}
],
"prometheus": {
"exporters": {
"libvirt": {
"services": {
"qemu": {
"bind": {
"port": 9177,
"address": "0.0.0.0"
},
"enabled": true
}
},
"packages": [
"libvirt-exporter"
],
"enabled": true
}
},
"server": {
"recording": {},
"target": {
"static": {
"fluentd": {
"relabel_configs": [
{
"regex": "10.11.1.1:24231",
"source_labels": "__address__",
"target_label": "host",
"replacement": "cmp1"
}
],
"honor_labels": true,
"endpoint": [
{
"port": 24231,
"address": "10.11.1.1"
}
]
},
"telegraf": {
"relabel_configs": [
{
"regex": "10.11.1.1:9126",
"source_labels": "__address__",
"target_label": "host",
"replacement": "cmp1"
}
],
"honor_labels": true,
"endpoint": [
{
"port": 9126,
"address": "10.11.1.1"
}
]
},
"libvirt_qemu_exporter": {
"metric_relabel": [
{
"regex": "10.11.1.1:9177",
"source_labels": "instance",
"target_label": "host",
"replacement": "cmp1"
}
],
"relabel_configs": [
{
"regex": "10.11.1.1:9177",
"source_labels": "__address__",
"target_label": "host",
"replacement": "cmp1"
}
],
"endpoint": [
{
"port": 9177,
"address": "10.11.1.1"
}
]
}
}
},
"alert": {
"ContrailApiDown": {
"labels": {
"severity": "minor",
"service": "contrail"
},
"annotations": {
"description": "The {{ $labels.name }} API endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.",
"summary": "{{ $labels.name }} API endpoint is not accessible"
},
"for": "2m",
"if": "http_response_status{name=~\"contrail.*\"} == 0"
},
"ContrailApiOutage": {
"labels": {
"severity": "critical",
"service": "contrail"
},
"annotations": {
"description": "The {{ $labels.name }} API is not accessible for all available endpoints for 2 minutes.",
"summary": "{{ $labels.name }} API outage"
},
"for": "2m",
"if": "count(http_response_status{name=~\"contrail.*\"} == 0) by (name) == count(http_response_status{name=~\"contrail.*\"}) by (name)"
},
"ContrailVrouterXMPPSessionsChangesTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "The OpenContrail vRouter XMPP sessions on the {{ $labels.host }} node have changed {{ $value }} times.",
"summary": "OpenContrail vRouter XMPP sessions changes reached the limit of5"
},
"if": "abs(delta(contrail_vrouter_xmpp[2m])) >= 5"
},
"CinderErrorLogsTooHigh": {
"labels": {
"severity": "warning",
"service": "cinder"
},
"annotations": {
"description": "The average per-second rate of errors in Cinder logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes).",
"summary": "High number of errors in Cinder logs"
},
"if": "sum(rate(log_messages{service=\"cinder\",level=~\"(?i:(error|emergency|fatal))\"}[5m])) without (level) > 0.2"
},
"ContrailApiDownMinor": {
"labels": {
"severity": "minor",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} {{ $labels.name }} API endpoints (>= 30.0%) are not accessible for 2 minutes.",
"summary": "30.0% of {{ $labels.name }} API endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=~\"contrail.*\"} == 0) by (name) >= count(http_response_status{name=~\"contrail.*\"}) by (name) *0.3"
},
"ContrailVrouterXMPPSessionsTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail vRouter XMPP sessions are open on the {{ $labels.host }} node for 2 minutes.",
"summary": "OpenContrail vRouter XMPP sessions reached the limit of 10"
},
"for": "2m",
"if": "min(contrail_vrouter_xmpp) by (host) >= 10"
},
"LibvirtDown": {
"labels": {
"severity": "critical",
"service": "libvirt"
},
"annotations": {
"description": "The Libvirt metric exporter fails to gather metrics on the {{ $labels.host }} node for 2 minutes.",
"summary": "Failure to gather Libvirt metrics"
},
"for": "2m",
"if": "libvirt_up == 0"
},
"CronProcessDown": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The cron process on the {{ $labels.host }} node is down.",
"summary": "Cron process is down"
},
"if": "procstat_running{process_name=\"cron\"} == 0"
},
"ContrailApiDownMajor": {
"labels": {
"severity": "major",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} {{ $labels.name }} API endpoints (>= 60.0%) are not accessible for 2 minutes.",
"summary": " 60.0% of {{ $labels.name }} API endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=~\"contrail.*\"} == 0) by (name) >= count(http_response_status{name=~\"contrail.*\"}) by (name) *0.6"
},
"ContrailBGPSessionsNoEstablished": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "There are no established OpenContrail BGP sessions on the {{ $labels.host }} node for 2 minutes.",
"summary": "No established OpenContrail BGP sessions"
},
"for": "2m",
"if": "max(contrail_bgp_session_count) by (host) == 0"
},
"ContrailXMPPSessionsChangesTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "The OpenContrail XMPP sessions on the {{ $labels.host }} node have changed {{ $value }} times.",
"summary": "OpenContrail XMPP sessions changes reached the limit of100"
},
"if": "abs(delta(contrail_xmpp_session_count[2m])) >= 100"
},
"SystemRxPacketsDroppedTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} packets received by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute.",
"summary": "60 received packets were dropped"
},
"if": "increase(net_drop_in[1m]) > 60 unless on (host,interface) bond_slave_active == 0"
},
"ContrailFlowsLabelInvalidTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail vRouter flows on the {{ $labels.host }} node had an invalid composite interface for 2 minutes.",
"summary": "OpenContrail vRouter flows with an invalid label reached the limit of 100"
},
"for": "2m",
"if": "min(contrail_vrouter_flows_invalid_label) by (host) >= 100"
},
"ContrailVrouterLLSSessionsTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail vRouter LLS sessions are open on the {{ $labels.host }} node for 2 minutes.",
"summary": "OpenContrail vRouter LLS sessions reached the limit of 10"
},
"for": "2m",
"if": "min(contrail_vrouter_lls) by (host) >= 10"
},
"ContrailFlowsDroppedTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"enabled": false,
"annotations": {
"description": "The average per-second rate of dropped OpenContrail vRouter flows on the {{ $labels.host }} node is {{ $value }} for 2 minutes.",
"summary": "OpenContrail vRouter dropped flows reached the limit of 0.2/s"
},
"for": "2m",
"if": "rate(contrail_vrouter_flows_flow_action_drop[5m]) >= 0.2"
},
"ContrailXMPPSessionsMissing": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail XMPP sessions are missing on the compute cluster for 2 minutes.",
"summary": "Missing OpenContrail XMPP sessions"
},
"for": "2m",
"if": "count(contrail_vrouter_xmpp) * 2 - sum(contrail_xmpp_session_count) > 0"
},
"ContrailProcessDownMajor": {
"labels": {
"severity": "major",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} {{ $labels.process_name }} processes (>= 60.0%) are down.",
"summary": "60.0% of {{ $labels.process_name }} processes are down"
},
"if": "count(procstat_running{process_name=~\"contrail.*\"} == 0) by (process_name) >= 0.6*count(procstat_running{process_name=~\"contrail.*\"}) by (process_name)"
},
"ContrailFlowsQueueSizeExceededTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "The average per-second rate of OpenContrail vRouter flows exceeding the queue size on the {{ $labels.host }} node is {{ $value }} for 2 minutes.",
"summary": "OpenContrail vRouter flows exceeding the queue size reached the limit of 0.1/s"
},
"for": "2m",
"if": "rate(contrail_vrouter_flows_flow_queue_limit_exceeded[5m]) >= 0.1"
},
"SystemLoadTooHighWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes.",
"summary": "System load is1.0"
},
"for": "5m",
"if": "system_load5 / system_n_cpus > 1.0"
},
"PacketsDroppedByCpuMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 24 hours.",
"summary": "CPU dropped 100 packets"
},
"if": "floor(increase(nstat_packet_drop[24h])) > 100"
},
"SshdProcessDown": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The SSH process on the {{ $labels.host }} node is down.",
"summary": "SSH process is down"
},
"if": "procstat_running{process_name=\"sshd\"} == 0"
},
"SystemSwapFullMinor": {
"labels": {
"severity": "minor",
"service": "system"
},
"annotations": {
"description": "The swap on the {{ $labels.host }} node is {{ $value }}% used for 2 minutes.",
"summary": "90.0% of swap is used"
},
"for": "2m",
"if": "swap_used_percent >= 90.0"
},
"NovaErrorLogsTooHigh": {
"labels": {
"severity": "warning",
"service": "nova"
},
"annotations": {
"description": "The average per-second rate of errors in Nova logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes).",
"summary": "High number of errors in Nova logs"
},
"if": "sum(rate(log_messages{service=\"nova\",level=~\"(?i:(error|emergency|fatal))\"}[5m])) without (level) > 0.2"
},
"ContrailXMPPSessionsDown": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail XMPP sessions on the {{ $labels.host }} node are down for 2 minutes.",
"summary": "OpenContrail XMPP sessions are down"
},
"for": "2m",
"if": "min(contrail_xmpp_session_down_count) by (host) > 0"
},
"NetdevBudgetRanOutsWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The rate of net_rx_action loops terminations on the {{ $labels.host }} node is {{ $value }} per second during the last 7 minutes. Modify the net.core.netdev_budget and net.core.netdev_budget_usecs kernel parameters.",
"summary": "CPU terminated 0.1 net_rx_action loops per second"
},
"for": "7m",
"if": "max(rate(nstat_time_squeeze[5m])) without (cpu) > 0.1"
},
"ContrailFlowsDiscardedTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "The average per-second rate of discarded OpenContrail vRouter flows on the {{ $labels.host }} node is {{ $value }} for 2 minutes.",
"summary": "OpenContrail vRouter discarded flows reached the limit of 0.1/s"
},
"for": "2m",
"if": "rate(contrail_vrouter_flows_discard[5m]) >= 0.1"
},
"SystemLoadTooHighCritical": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes.",
"summary": "System load is2.0"
},
"for": "5m",
"if": "system_load5 / system_n_cpus > 2.0"
},
"SystemDiskInodesFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of disk inodes in the {{ $labels.path }} volume for 2 minutes.",
"summary": "85.0% of inodes for {{ $labels.path }} are used"
},
"for": "2m",
"if": "100 * disk_inodes_used / disk_inodes_total >= 85.0"
},
"NtpOffsetTooHigh": {
"labels": {
"severity": "warning",
"service": "ntp"
},
"annotations": {
"description": "The NTP offset on the {{ $labels.host }} node is {{ $value }}ms for 2 minutes.",
"summary": "NTP offset reached the limit of 200ms"
},
"for": "2m",
"if": "ntpq_offset >= 200"
},
"ContrailBGPSessionsNoActive": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "There are no active OpenContrail BGP sessions on the {{ $labels.host }} node for 2 minutes.",
"summary": "No active OpenContrail BGP sessions"
},
"for": "2m",
"if": "max(contrail_bgp_session_up_count) by (host) == 0"
},
"PacketsDroppedByCpuMinor": {
"labels": {
"severity": "minor",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 24 hours.",
"summary": "CPU dropped 0 packets"
},
"if": "floor(increase(nstat_packet_drop[24h])) > 0"
},
"ContrailProcessDown": {
"labels": {
"severity": "minor",
"service": "contrail"
},
"annotations": {
"description": "The {{ $labels.process_name }} process on the {{ $labels.host }} node is down.",
"summary": "{{ $labels.process_name }} process is down"
},
"if": "procstat_running{process_name=~\"contrail.*\"} == 0"
},
"SaltMinionServiceDown": {
"labels": {
"severity": "critical",
"service": "salt"
},
"annotations": {
"description": "The salt-minion service on the {{ $labels.host }} node is down.",
"summary": "Salt-minion service is down"
},
"if": "procstat_running{process_name=\"salt-minion\"} == 0"
},
"ContrailXMPPSessionsMissingEstablished": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} established OpenContrail XMPP sessions are missing on the compute cluster for 2 minutes.",
"summary": "Missing established OpenContrail XMPP sessions"
},
"for": "2m",
"if": "count(contrail_vrouter_xmpp) * 2 - sum(contrail_xmpp_session_up_count) > 0"
},
"ContrailProcessOutage": {
"labels": {
"severity": "critical",
"service": "contrail"
},
"annotations": {
"description": "All {{ $labels.process_name }} processes are down.",
"summary": "{{ $labels.name }} service outage"
},
"if": "count(procstat_running{process_name=~\"contrail.*\"} == 0) by (process_name) == count(procstat_running{process_name=~\"contrail.*\"}) by (process_name)"
},
"SystemDiskFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The disk partition ({{ $labels.path }}) on the {{ $labels.host }} node is {{ $value }}% full for 2 minutes.",
"summary": "Disk partition {{ $labels.path }} is 85.0% full"
},
"for": "2m",
"if": "disk_used_percent >= 85.0"
},
"SshFailedLoginsTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} failed SSH login attempts on the {{ $labels.host }} node during the last 5 minutes.",
"summary": "5 failed SSH logins"
},
"if": "increase(failed_logins_total[5m]) > 5"
},
"ContrailFlowsNextHopInvalidTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "The average per-second rate of OpenContrail vRouter flows with an invalid next hop on the {{ $labels.host }} node is {{ $value }} for 2 minutes.",
"summary": "OpenContrail vRouter flows with an invalid next hop reached the limit of 0.1/s"
},
"for": "2m",
"if": "rate(contrail_vrouter_flows_invalid_nh[5m]) >= 0.1"
},
"SystemDiskInodesFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of disk inodes in the {{ $labels.path }} volume for 2 minutes.",
"summary": "95.0% of inodes for {{ $labels.path }} are used"
},
"for": "2m",
"if": "100 * disk_inodes_used / disk_inodes_total >= 95.0"
},
"SystemMemoryFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of memory for 2 minutes.",
"summary": "90.0% of memory is used"
},
"for": "2m",
"if": "mem_used_percent >= 90.0"
},
"ContrailBGPSessionsDown": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail BGP sessions on the {{ $labels.host }} node are down for 2 minutes.",
"summary": "OpenContrail BGP sessions are down"
},
"for": "2m",
"if": "min(contrail_bgp_session_down_count) by (host) > 0"
},
"ContrailFlowsActiveTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail vRouter flows per second on the {{ $labels.host }} node are active for 2 minutes.",
"summary": "OpenContrail vRouter active flows reached the limit of 100"
},
"for": "2m",
"if": "deriv(contrail_vrouter_flows_active[5m]) >= 100"
},
"SystemTxPacketsDroppedTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} packets transmitted by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute.",
"summary": "100 transmitted packets were dropped"
},
"if": "increase(net_drop_out[1m]) > 100"
},
"ContrailVrouterLLSSessionsChangesTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "The OpenContrail vRouter LLS sessions on the {{ $labels.host }} node have changed {{ $value }} times.",
"summary": "OpenContrail vRouter LLS sessions changes reached the limit of 5"
},
"if": "abs(delta(contrail_vrouter_lls[2m])) >= 5"
},
"SystemDiskFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The disk partition ({{ $labels.path }}) on the {{ $labels.host }} node is {{ $value }}% full for 2 minutes.",
"summary": "Disk partition {{ $labels.path }} is 95.0% full"
},
"for": "2m",
"if": "disk_used_percent >= 95.0"
},
"ContrailProcessDownMinor": {
"labels": {
"severity": "minor",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} {{ $labels.process_name }} processes (>= 30.0%) are down.",
"summary": "30.0% of {{ $labels.process_name }} processes are down"
},
"if": "count(procstat_running{process_name=~\"contrail.*\"} == 0) by (process_name) >= 0.3*count(procstat_running{process_name=~\"contrail.*\"}) by (process_name)"
},
"SystemCpuFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The average CPU usage on the {{ $labels.host }} node is {{ $value }}% for 2 minutes.",
"summary": "90.0% CPU usage"
},
"for": "2m",
"if": "100 - avg_over_time(cpu_usage_idle{cpu=\"cpu-total\"}[5m]) > 90.0"
},
"ContrailFlowsInterfaceInvalidTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "The average per-second rate of OpenContrail vRouter flows with an invalid composite interface on the {{ $labels.host }} node is {{ $value }} for 2 minutes.",
"summary": "OpenContrail vRouter flows with an invalid composite interface reached the limit of 0.05/s"
},
"for": "2m",
"if": "rate(contrail_vrouter_flows_composite_invalid_interface[5m]) >= 0.05"
},
"ContrailFlowsTableFullTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail vRouter flows on the {{ $labels.host }} node had a full table for 2 minutes.",
"summary": "OpenContrail vRouter flows with full table reached the limit of 100"
},
"for": "2m",
"if": "min(contrail_vrouter_flows_flow_table_full) by (host) >= 100"
},
"ContrailXMPPSessionsTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail XMPP sessions on the {{ $labels.host }} node are open for 2 minutes.",
"summary": "OpenContrail XMPP sessions reached the limit of 500"
},
"for": "2m",
"if": "min(contrail_xmpp_session_count) by (host) >= 500"
},
"ContrailFlowsFragErrTooHigh": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "{{ $value }} OpenContrail vRouter flows on the {{ $labels.host }} node had fragment errors for 2 minutes.",
"summary": "OpenContrail vRouter flows with fragment errors reached the limit of 0.2"
},
"for": "2m",
"if": "min(contrail_vrouter_flows_frag_err) by (host) >= 100"
},
"SystemDiskErrorsTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.device }} disk on the {{ $labels.host }} node is reporting errors for 5 minutes.",
"summary": "Disk {{ $labels.device }} is failing"
},
"for": "5m",
"if": "increase(hdd_errors_total[1m]) > 0"
},
"SystemSwapFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The swap on the {{ $labels.host }} node is {{ $value }}% used for 2 minutes.",
"summary": "50.0% of swap is used"
},
"for": "2m",
"if": "swap_used_percent >= 50.0"
},
"SystemMemoryFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of memory for 2 minutes.",
"summary": "95.0% of memory is used"
},
"for": "2m",
"if": "mem_used_percent >= 95.0"
},
"ContrailVrouterXMPPSessionsZero": {
"labels": {
"severity": "warning",
"service": "contrail"
},
"annotations": {
"description": "There are no OpenContrail vRouter XMPP sessions on the {{ $labels.host }} node for 2 minutes.",
"summary": "No OpenContrail vRouter XMPP sessions"
},
"for": "2m",
"if": "min(contrail_vrouter_xmpp) by (host) == 0"
}
}
}
},
"path": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"machine_id": "62cb8077993242e295fc27f611de39f6",
"salt": {
"graph": [
{
"host": "cmp1.ozhurba-os-oc-cicd-sl.local",
"type": "software-system",
"service": "ntp.client",
"relations": [
{
"host_external": "udp://10.10.0.15",
"direction": "source",
"type": "udp",
"service": "other-service"
},
{
"host_external": "udp://pool.ntp.org",
"direction": "source",
"type": "udp",
"service": "other-service"
}
]
},
{
"host": "cmp1.ozhurba-os-oc-cicd-sl.local",
"type": "software-system",
"service": "linux.system",
"relations": [
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//saltstack-2017.7//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//td-agent//xenial xenial contrib",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//openstack-queens//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//td-agent//xenial xenial contrib",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//percona//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//saltstack-2017.7//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//percona//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//opencontrail-4.0//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//ubuntu/ xenial-security main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//extra//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//extra//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial-updates main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial-security main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//openstack-queens//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
}
]
},
{
"host": "cmp1.ozhurba-os-oc-cicd-sl.local",
"type": "software-config",
"service": "salt.minion",
"relations": [
{
"direction": "source",
"type": "tcp-0mq",
"service": "salt.master",
"host_from_target": "10.10.0.15"
}
]
}
]
}
},
"ctl01.fakedomain.local": {
"biosversion": "1.10.2-1.1~u16.04+mcp2",
"kernel": "Linux",
"domain": "ozhurba-os-oc-cicd-sl.local",
"uid": 0,
"zmqversion": "4.1.4",
"kernelrelease": "4.15.0-43-generic",
"pythonpath": [
"/usr/bin",
"/usr/lib/python2.7",
"/usr/lib/python2.7/plat-x86_64-linux-gnu",
"/usr/lib/python2.7/lib-tk",
"/usr/lib/python2.7/lib-old",
"/usr/lib/python2.7/lib-dynload",
"/usr/local/lib/python2.7/dist-packages",
"/usr/lib/python2.7/dist-packages"
],
"serialnumber": "a29a9e8f-3421-47ae-aaeb-1d02391f7e62",
"neutron_policy": {
"create_router:distributed": "rule:admin_only",
"get_policy_dscp_marking_rule": "rule:regular_user",
"shared_subnetpools": "field:subnetpools:shared=True",
"context_is_advsvc": "role:advsvc",
"get_floatingip": "rule:admin_or_owner",
"context_is_admin": "role:admin",
"update_router:external_gateway_info:network_id": "rule:admin_or_owner",
"update_port:port_security_enabled": "rule:context_is_advsvc or rule:admin_or_network_owner",
"admin_owner_or_network_owner": "rule:owner or rule:admin_or_network_owner",
"get_loadbalancer-pools": "rule:admin_only",
"get_service_provider": "rule:regular_user",
"create_network:router:external": "rule:admin_only",
"create_port:mac_learning_enabled": "rule:context_is_advsvc or rule:admin_or_network_owner",
"get_service_profiles": "rule:admin_only",
"create_qos_queue": "rule:admin_only",
"get_port": "rule:context_is_advsvc or rule:admin_owner_or_network_owner",
"delete_address_scope": "rule:admin_or_owner",
"delete_agent": "rule:admin_only",
"update_policy_dscp_marking_rule": "rule:admin_only",
"update_address_scope:shared": "rule:admin_only",
"create_router": "rule:regular_user",
"delete_metering_label_rule": "rule:admin_only",
"update_network:segments": "rule:admin_only",
"update_network:provider:segmentation_id": "rule:admin_only",
"create_network:segments": "rule:admin_only",
"delete_log": "rule:admin_only",
"get_policy_profiles": "",
"get_port:binding:host_id": "rule:admin_only",
"get_rule_type": "rule:regular_user",
"update_port:fixed_ips:ip_address": "rule:context_is_advsvc or rule:admin_or_network_owner",
"create_segment": "rule:admin_only",
"get_security_group_rule": "rule:admin_or_owner",
"update_port:binding:host_id": "rule:admin_only",
"delete_port": "rule:context_is_advsvc or rule:admin_owner_or_network_owner",
"delete_rbac_policy": "rule:admin_or_owner",
"get_policy_profile": "",
"update_network:router:external": "rule:admin_only",
"create_network:provider:segmentation_id": "rule:admin_only",
"get_subnet": "rule:admin_or_owner or rule:shared",
"create_port": "",
"create_subnet": "rule:admin_or_network_owner",
"update_policy": "rule:admin_only",
"get_network:queue_id": "rule:admin_only",
"get_policy": "rule:regular_user",
"update_network": "rule:admin_or_owner",
"delete_network": "rule:admin_or_owner",
"get_service_profile": "rule:admin_only",
"get_policy_minimum_bandwidth_rule": "rule:regular_user",
"update_subnet": "rule:admin_or_network_owner",
"update_router:ha": "rule:admin_only",
"update_subnet:service_types": "rule:admin_only",
"get_flavor": "rule:regular_user",
"create_policy_bandwidth_limit_rule": "rule:admin_only",
"create_subnetpool": "",
"get_metering_label_rule": "rule:admin_only",
"add_router_interface": "rule:admin_or_owner",
"create_router:external_gateway_info:external_fixed_ips": "rule:admin_only",
"update_rbac_policy:target_tenant": "rule:restrict_wildcard and rule:admin_or_owner",
"create_rbac_policy:target_tenant": "rule:restrict_wildcard",
"get_port:binding:vif_details": "rule:admin_only",
"get_router:ha": "rule:admin_only",
"update_port:device_owner": "not rule:network_device or rule:context_is_advsvc or rule:admin_or_network_owner",
"update_log": "rule:admin_only",
"update_subnetpool:is_default": "rule:admin_only",
"update_policy_minimum_bandwidth_rule": "rule:admin_only",
"get_subports": "",
"get_port:binding:vif_type": "rule:admin_only",
"shared": "field:networks:shared=True",
"update_security_group": "rule:admin_or_owner",
"get_logs": "rule:admin_only",
"get_agent": "rule:admin_only",
"create_floatingip:floating_ip_address": "rule:admin_only",
"delete_floatingip": "rule:admin_or_owner",
"delete_flavor_service_profile": "rule:admin_only",
"create_port:port_security_enabled": "rule:context_is_advsvc or rule:admin_or_network_owner",
"update_service_profile": "rule:admin_only",
"update_network:shared": "rule:admin_only",
"update_router:distributed": "rule:admin_only",
"create_port:device_owner": "not rule:network_device or rule:context_is_advsvc or rule:admin_or_network_owner",
"delete_policy": "rule:admin_only",
"regular_user": "",
"admin_or_network_owner": "rule:context_is_admin or tenant_id:%(network:tenant_id)s",
"update_segment": "rule:admin_only",
"get_network:segments": "rule:admin_only",
"create_network:provider:physical_network": "rule:admin_only",
"shared_address_scopes": "field:address_scopes:shared=True",
"get_network:provider:segmentation_id": "rule:admin_only",
"get_router": "rule:admin_or_owner",
"get_l3-agents": "rule:admin_only",
"get_router:distributed": "rule:admin_only",
"create_lsn": "rule:admin_only",
"create_port:fixed_ips:ip_address": "rule:context_is_advsvc or rule:admin_or_network_owner",
"update_network:provider:network_type": "rule:admin_only",
"create_metering_label": "rule:admin_only",
"create_metering_label_rule": "rule:admin_only",
"create_flavor_service_profile": "rule:admin_only",
"delete_security_group_rule": "rule:admin_or_owner",
"delete_policy_bandwidth_limit_rule": "rule:admin_only",
"create_l3-router": "rule:admin_only",
"create_policy_minimum_bandwidth_rule": "rule:admin_only",
"update_router": "rule:admin_or_owner",
"create_network:shared": "rule:admin_only",
"get_subnet:segment_id": "rule:admin_only",
"get_qos_queue": "rule:admin_only",
"create_dhcp-network": "rule:admin_only",
"get_dhcp-networks": "rule:admin_only",
"add_subports": "rule:admin_or_owner",
"create_port:binding:host_id": "rule:admin_only",
"update_router:external_gateway_info:enable_snat": "rule:admin_only",
"create_port:mac_address": "rule:context_is_advsvc or rule:admin_or_network_owner",
"owner": "tenant_id:%(tenant_id)s",
"get_network_ip_availabilities": "rule:admin_only",
"remove_router_interface": "rule:admin_or_owner",
"update_router:external_gateway_info": "rule:admin_or_owner",
"get_network:provider:physical_network": "rule:admin_only",
"get_policy_bandwidth_limit_rule": "rule:regular_user",
"create_router:external_gateway_info:enable_snat": "rule:admin_only",
"delete_l3-router": "rule:admin_only",
"create_security_group": "rule:admin_or_owner",
"delete_router": "rule:admin_or_owner",
"delete_policy_dscp_marking_rule": "rule:admin_only",
"get_rbac_policy": "rule:admin_or_owner",
"update_floatingip": "rule:admin_or_owner",
"delete_network_profile": "rule:admin_only",
"create_policy_dscp_marking_rule": "rule:admin_only",
"update_router:external_gateway_info:external_fixed_ips": "rule:admin_only",
"admin_only": "rule:context_is_admin",
"update_port:allowed_address_pairs": "rule:admin_or_network_owner",
"get_lsn": "rule:admin_only",
"update_address_scope": "rule:admin_or_owner",
"create_network:is_default": "rule:admin_only",
"external": "field:networks:router:external=True",
"get_network_profile": "",
"create_address_scope": "",
"create_floatingip": "rule:regular_user",
"get_loadbalancer-agent": "rule:admin_only",
"get_network:router:external": "rule:regular_user",
"create_address_scope:shared": "rule:admin_only",
"create_port:fixed_ips:subnet_id": "rule:context_is_advsvc or rule:admin_or_network_owner or rule:shared",
"create_network_profile": "rule:admin_only",
"delete_subnet": "rule:admin_or_network_owner",
"get_network_profiles": "",
"delete_trunk": "rule:admin_or_owner",
"create_network": "",
"get_auto_allocated_topology": "rule:admin_or_owner",
"get_network": "rule:admin_or_owner or rule:shared or rule:external or rule:context_is_advsvc",
"create_security_group_rule": "rule:admin_or_owner",
"get_loadbalancer-hosting-agent": "rule:admin_only",
"delete_security_group": "rule:admin_or_owner",
"get_network:provider:network_type": "rule:admin_only",
"create_trunk": "rule:regular_user",
"update_policy_bandwidth_limit_rule": "rule:admin_only",
"get_network_ip_availability": "rule:admin_only",
"get_metering_label": "rule:admin_only",
"restrict_wildcard": "(not field:rbac_policy:target_tenant=*) or rule:admin_only",
"update_agent": "rule:admin_only",
"create_flavor": "rule:admin_only",
"create_port:binding:profile": "rule:admin_only",
"get_port:binding:profile": "rule:admin_only",
"update_port:fixed_ips:subnet_id": "rule:context_is_advsvc or rule:admin_or_network_owner or rule:shared",
"create_network:provider:network_type": "rule:admin_only",
"update_port:data_plane_status": "rule:admin_or_data_plane_int",
"create_port:allowed_address_pairs": "rule:admin_or_network_owner",
"create_router:ha": "rule:admin_only",
"update_network_profile": "rule:admin_only",
"delete_dhcp-network": "rule:admin_only",
"create_policy": "rule:admin_only",
"create_log": "rule:admin_only",
"update_port:mac_learning_enabled": "rule:context_is_advsvc or rule:admin_or_network_owner",
"update_port": "rule:admin_or_owner or rule:context_is_advsvc",
"get_security_groups": "rule:admin_or_owner",
"admin_or_data_plane_int": "rule:context_is_admin or role:data_plane_integrator",
"default": "rule:admin_or_owner",
"get_trunk": "rule:admin_or_owner",
"network_device": "field:port:device_owner=~^network:",
"delete_policy_minimum_bandwidth_rule": "rule:admin_only",
"get_port:queue_id": "rule:admin_only",
"update_port:binding:profile": "rule:admin_only",
"remove_subports": "rule:admin_or_owner",
"get_loggable_resources": "rule:admin_only",
"update_network:provider:physical_network": "rule:admin_only",
"get_flavor_service_profile": "rule:regular_user",
"get_address_scope": "rule:admin_or_owner or rule:shared_address_scopes",
"update_flavor": "rule:admin_only",
"get_security_group_rules": "rule:admin_or_owner",
"get_security_group": "rule:admin_or_owner",
"delete_flavor": "rule:admin_only",
"create_subnetpool:is_default": "rule:admin_only",
"update_policy_profiles": "rule:admin_only",
"get_subnetpool": "rule:admin_or_owner or rule:shared_subnetpools",
"create_subnet:service_types": "rule:admin_only",
"get_l3-routers": "rule:admin_only",
"create_rbac_policy": "",
"delete_metering_label": "rule:admin_only",
"update_subnetpool": "rule:admin_or_owner",
"get_dhcp-agents": "rule:admin_only",
"get_log": "rule:admin_only",
"get_flavors": "rule:regular_user",
"create_subnetpool:shared": "rule:admin_only",
"get_agent-loadbalancers": "rule:admin_only",
"delete_subnetpool": "rule:admin_or_owner",
"update_port:mac_address": "rule:admin_only or rule:context_is_advsvc",
"get_segment": "rule:admin_only",
"delete_service_profile": "rule:admin_only",
"create_subnet:segment_id": "rule:admin_only",
"create_service_profile": "rule:admin_only",
"delete_segment": "rule:admin_only",
"update_rbac_policy": "rule:admin_or_owner",
"admin_or_owner": "rule:context_is_admin or rule:owner"
},
"telegraf": {
"remote_agent": {
"input": {
"openstack": {
"username": "admin",
"cpu_ratio": "16.0",
"region": "RegionOne",
"interval": "3m",
"project": "admin",
"monitor_agents": "true",
"password": "lkgQzExIrlxueh57sLEm1vkcOfQgDuWh",
"tenant": "admin",
"identity_endpoint": "http://10.11.0.10:35357/"
}
},
"processor": {},
"dir": {
"config": "/srv/volumes/local/telegraf",
"config_d": "/srv/volumes/local/telegraf/telegraf.d"
},
"output": {
"prometheus_client_openstack": {
"engine": "prometheus",
"bind": {
"port": "9127",
"address": "0.0.0.0"
},
"expiration_interval": "6m",
"namepass": [
"openstack*"
],
"template": "telegraf/files/output/prometheus_client.conf"
},
"prometheus_client": {
"namedrop": [
"openstack*"
]
}
}
},
"agent": {
"metric_batch_size": 1000,
"collection_jitter": 2,
"interval": 15,
"enabled": true,
"pkgs": [
"telegraf"
],
"round_interval": false,
"output": {
"prometheus_client": {
"engine": "prometheus",
"bind": {
"port": 9126,
"address": "0.0.0.0"
},
"string_as_label": false
}
},
"input": {
"haproxy": {
"fielddrop": [
"addr",
"agent_status",
"check_status",
"cookie",
"last_agt",
"last_chk",
"mode",
"status",
"tracked"
],
"servers": [
"/run/haproxy/admin.sock"
]
},
"kernel": null,
"processes": null,
"nstat": {
"fieldpass": [
"packet_drop",
"time_squeeze"
]
},
"ipcheck": {
"ips": [
"10.11.0.10"
],
"interface_name_regexp_exclude": "^docker.*",
"template": "keepalived/files/telegraf.conf"
},
"x509": {
"sources": [
"/srv/salt/pki/ozhurba-os-oc-cicd-sl/10.13.250.9.crt"
]
},
"ntp": {
"template": "ntp/files/telegraf.conf"
},
"mem": null,
"keepalived": {
"template": "keepalived/files/vrrp_telegraf.conf"
},
"system": null,
"http_listener": {
"read_timeout": "10s",
"bind": {
"port": 8186,
"address": "127.0.0.1"
},
"tagexclude": [
"hostname"
],
"write_timeout": "10s"
},
"swap": null,
"linux_sysctl_fs": null,
"diskio": null,
"procstat": {
"process": {
"sshd": {
"exe": "sshd"
},
"salt-minion": {
"pattern": "salt-minion"
},
"keepalived": {
"exe": "keepalived"
},
"cron": {
"exe": "cron"
},
"memcached": {
"exe": "memcached"
},
"ntpd": {
"exe": "ntpd"
}
}
},
"apache": {
"urls": [
"http://127.0.0.1:80/server-status?auto"
],
"template": "apache/files/telegraf.conf"
},
"net": null,
"disk": {
"ignore_fs": [
"aufs",
"rootfs",
"sysfs",
"proc",
"devtmpfs",
"devpts",
"tmpfs",
"fusectl",
"cgroup",
"overlay"
]
},
"memcached": {
"servers": [
{
"port": 11211,
"address": "127.0.0.1"
}
]
},
"cpu": {
"totalcpu": true,
"percpu": false
},
"http_response": {
"glance-api": {
"expected_code": 300,
"address": "http://10.11.0.11:9292/"
},
"cinder-api": {
"expected_code": 300,
"address": "http://10.11.0.11:8776/"
},
"heat-cfn-api": {
"expected_code": 300,
"address": "http://10.11.0.11:8000/"
},
"nova-api": {
"expected_code": 200,
"address": "http://10.11.0.11:8774/"
},
"neutron-api": {
"expected_code": 200,
"address": "http://10.11.0.11:9696/"
},
"keystone-public-api": {
"expected_code": 300,
"address": "http://10.11.0.11:5000/"
},
"glance-registry": {
"expected_code": 401,
"address": "http://10.11.0.11:9191/"
},
"heat-api": {
"expected_code": 300,
"address": "http://10.11.0.11:8004/"
},
"keystone-admin-api": {
"expected_code": 300,
"address": "http://10.11.0.11:35357/"
}
}
},
"metric_buffer_limit": 10000,
"processor": {},
"dir": {
"config": "/etc/telegraf",
"config_d": "/etc/telegraf/telegraf.d"
}
}
},
"ip_interfaces": {
"ens4": [
"10.10.100.7",
"fe80::f816:3eff:fe4e:f3c8"
],
"lo": [
"127.0.0.1",
"::1"
],
"ens6": [
"10.13.100.22",
"fe80::f816:3eff:fe06:f3b"
],
"ens5": [
"10.12.100.14",
"fe80::f816:3eff:fedb:db4f"
],
"ens3": [
"10.11.0.11",
"fe80::f816:3eff:fe0c:1532"
]
},
"groupname": "root",
"fqdn_ip6": [],
"mem_total": 9992,
"saltversioninfo": [
2017,
7,
8,
0
],
"SSDs": [],
"mdadm": [],
"id": "ctl01.ozhurba-os-oc-cicd-sl.local",
"manufacturer": "OpenStack Foundation",
"osrelease": "16.04",
"ps": "ps -efHww",
"systemd": {
"version": "229",
"features": "+PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ -LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD -IDN"
},
"fqdn": "ctl01.ozhurba-os-oc-cicd-sl.local",
"uuid": "b7410784-ac6d-44c3-a9c3-e2ae2aaad69e",
"ip6_interfaces": {
"ens4": [
"fe80::f816:3eff:fe4e:f3c8"
],
"lo": [
"::1"
],
"ens6": [
"fe80::f816:3eff:fe06:f3b"
],
"ens5": [
"fe80::f816:3eff:fedb:db4f"
],
"ens3": [
"fe80::f816:3eff:fe0c:1532"
]
},
"num_cpus": 4,
"hwaddr_interfaces": {
"ens4": "fa:16:3e:4e:f3:c8",
"lo": "00:00:00:00:00:00",
"ens6": "fa:16:3e:06:0f:3b",
"ens5": "fa:16:3e:db:db:4f",
"ens3": "fa:16:3e:0c:15:32"
},
"init": "systemd",
"ip4_interfaces": {
"ens4": [
"10.10.100.7"
],
"lo": [
"127.0.0.1"
],
"ens6": [
"10.13.100.22"
],
"ens5": [
"10.12.100.14"
],
"ens3": [
"10.11.0.11"
]
},
"grafana": {
"dashboard": {
"openstack_tenants_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/openstack_tenants_prometheus.json",
"format": "json"
},
"neutron_influxdb": {
"datasource": "influxdb",
"template": "neutron/files/grafana_dashboards/neutron_influxdb.json",
"format": "json"
},
"ntp_prometheus": {
"datasource": "prometheus",
"template": "ntp/files/grafana_dashboards/ntp_prometheus.json",
"format": "json"
},
"heat_prometheus": {
"datasource": "prometheus",
"template": "heat/files/grafana_dashboards/heat_prometheus_fluentd.json",
"format": "json"
},
"cinder_prometheus": {
"datasource": "prometheus",
"template": "cinder/files/grafana_dashboards/cinder_prometheus_fluentd.json",
"format": "json"
},
"neutron_prometheus": {
"datasource": "prometheus",
"template": "neutron/files/grafana_dashboards/neutron_prometheus_fluentd.json",
"format": "json"
},
"glance_prometheus": {
"datasource": "prometheus",
"template": "glance/files/grafana_dashboards/glance_prometheus_fluentd.json",
"format": "json"
},
"hypervisor_influxdb": {
"datasource": "influxdb",
"template": "nova/files/grafana_dashboards/hypervisor_influxdb.json",
"format": "json"
},
"nova_utilization_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_utilization_prometheus.json",
"format": "json"
},
"haproxy_prometheus": {
"datasource": "prometheus",
"template": "haproxy/files/grafana_dashboards/haproxy_prometheus.json",
"format": "json"
},
"glusterfs_prometheus": {
"datasource": "prometheus",
"template": "glusterfs/files/grafana_dashboards/glusterfs_prometheus.json",
"format": "json"
},
"apache_influxdb": {
"datasource": "influxdb",
"template": "apache/files/grafana_dashboards/apache_influxdb.json",
"format": "json"
},
"glusterfs_influxdb": {
"datasource": "influxdb",
"template": "glusterfs/files/grafana_dashboards/glusterfs_influxdb.json",
"format": "json"
},
"cinder_influxdb": {
"datasource": "influxdb",
"template": "cinder/files/grafana_dashboards/cinder_influxdb.json",
"format": "json"
},
"openstack_overview_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/openstack_overview_prometheus.json",
"format": "json"
},
"linux_network_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_network_prometheus.json",
"format": "json"
},
"keystone_influxdb": {
"datasource": "influxdb",
"template": "keystone/files/grafana_dashboards/keystone_influxdb.json",
"format": "json"
},
"linux_disk_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_disk_prometheus.json",
"format": "json"
},
"service_level": {
"datasource": "influxdb",
"row": {
"neutron-service-level": {
"title": "Neutron Service Levels",
"panel": {
"neutron-control-uptime": {
"target": {
"neutron-control-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'neutron-control' AND value = 4 AND $timeFilter"
},
"neutron-control-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'neutron-control' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
},
"neutron-control-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'neutron-control' AND value = 0 AND $timeFilter"
}
},
"title": "Neutron Control Uptime"
},
"neutron-api-requests": {
"target": {
"neutron-api-okay-status": {
"alias": "Okay",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_neutron_http_response_times WHERE environment_label = '$environment' AND (http_status = '2xx' OR http_status = '3xx') AND $timeFilter"
},
"neutron-api-fatal-status": {
"alias": "Fatal",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_neutron_http_response_times WHERE environment_label = '$environment' AND http_status = '5xx' AND $timeFilter"
},
"neutron-api-error-status": {
"alias": "Error",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_neutron_http_response_times WHERE environment_label = '$environment' AND http_status = '4xx' AND $timeFilter"
}
},
"title": "Neutron API Requests"
},
"neutron-data-uptime": {
"target": {
"neutron-data-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'neutron-data' AND value = 0 AND $timeFilter"
},
"neutron-data-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'neutron-data' AND value = 4 AND $timeFilter"
},
"neutron-data-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'neutron-data' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
}
},
"title": "Neutron Data Uptime"
},
"neutron-api-availability": {
"target": {
"neutron-api-up-status": {
"alias": "Up",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'neutron-api' AND value = 1 AND $timeFilter"
},
"neutron-api-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'neutron-api' AND value = 0 AND $timeFilter"
}
},
"title": "Neutron API Availability"
}
}
},
"heat-service-level": {
"title": "Heat Service Levels",
"panel": {
"heat-api-requests": {
"target": {
"heat-api-fatal-status": {
"alias": "Fatal",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_heat_http_response_times WHERE environment_label = '$environment' AND http_status = '5xx' AND $timeFilter"
},
"heat-api-error-status": {
"alias": "Error",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_heat_http_response_times WHERE environment_label = '$environment' AND http_status = '4xx' AND $timeFilter"
},
"heat-api-okay-status": {
"alias": "Okay",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_heat_http_response_times WHERE environment_label = '$environment' AND (http_status = '2xx' OR http_status = '3xx') AND $timeFilter"
}
},
"title": "Heat API Requests"
},
"heat-control-uptime": {
"target": {
"heat-control-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'heat' AND value = 4 AND $timeFilter"
},
"heat-control-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'heat' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
},
"heat-control-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'heat' AND value = 0 AND $timeFilter"
}
},
"title": "Heat Control Uptime"
},
"heat-api-availability": {
"target": {
"heat-api-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND (service = 'heat-api' OR service = 'heat-cfn-api') AND value = 0 AND $timeFilter"
},
"heat-api-up-status": {
"alias": "Up",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND (service = 'heat-api' OR service = 'heat-cfn-api') AND value = 1 AND $timeFilter"
}
},
"title": "Heat API Availability (including cfn-api)"
}
}
},
"cinder-service-level": {
"title": "Cinder Service Levels",
"panel": {
"cinder-api-requests": {
"target": {
"cinder-api-okay-status": {
"alias": "Okay",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_cinder_http_response_times WHERE environment_label = '$environment' AND (http_status = '2xx' OR http_status = '3xx') AND $timeFilter"
},
"cinder-api-fatal-status": {
"alias": "Fatal",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_cinder_http_response_times WHERE environment_label = '$environment' AND http_status = '5xx' AND $timeFilter"
},
"cinder-api-error-status": {
"alias": "Error",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_cinder_http_response_times WHERE environment_label = '$environment' AND http_status = '4xx' AND $timeFilter"
}
},
"title": "Cinder API Requests"
},
"cinder-control-uptime": {
"target": {
"cinder-control-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-control' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
},
"cinder-control-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-control' AND value = 0 AND $timeFilter"
},
"cinder-control-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-control' AND value = 4 AND $timeFilter"
}
},
"title": "Cinder Control Uptime"
},
"cinder-data-uptime": {
"target": {
"cinder-data-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-data' AND value = 0 AND $timeFilter"
},
"cinder-data-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-data' AND value = 4 AND $timeFilter"
},
"cinder-data-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'cinder-data' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
}
},
"title": "Cinder Data Uptime"
},
"cinder-api-availability": {
"target": {
"cinder-api-up-status": {
"alias": "Up",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND (service = 'cinder-api' OR service = 'cinder-v2-api') AND value = 1 AND $timeFilter"
},
"cinder-api-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND (service = 'cinder-api' OR service = 'cinder-v2-api') AND value = 0 AND $timeFilter"
}
},
"title": "Cinder API Availability (V1 & V2)"
}
}
},
"keystone-service-level": {
"title": "Keystone Service Levels",
"panel": {
"keystone-control-uptime": {
"target": {
"keystone-control-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'keystone' AND value = 0 AND $timeFilter"
},
"keystone-control-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'keystone' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
},
"keystone-control-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'keystone' AND value = 4 AND $timeFilter"
}
},
"title": "Keystone Control Uptime"
},
"keystone-api-availability": {
"target": {
"keystone-api-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'keystone-public-api' AND value = 0 AND $timeFilter"
},
"keystone-api-up-status": {
"alias": "Up",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'keystone-public-api' AND value = 1 AND $timeFilter"
}
},
"title": "Keystone API Availability"
}
}
},
"nova-service-level": {
"title": "Nova Service Levels",
"panel": {
"nova-control-uptime": {
"target": {
"nova-control-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-control' AND value = 0 AND $timeFilter"
},
"nova-control-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-control' AND value = 4 AND $timeFilter"
},
"nova-control-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-control' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
}
},
"title": "Nova Control Uptime"
},
"nova-data-uptime": {
"target": {
"nova-data-healthy-status": {
"alias": "Healthy",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-data' AND value = 0 AND $timeFilter"
},
"nova-data-degraded-unknwon-status": {
"alias": "Degraded or Unknwon",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-data' AND (value = 1 OR value = 2 OR value = 3) AND $timeFilter"
},
"nova-data-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM cluster_status WHERE environment_label = '$environment' AND cluster_name = 'nova-data' AND value = 4 AND $timeFilter"
}
},
"title": "Nova Data Uptime"
},
"nova-api-availability": {
"target": {
"nova-api-down-status": {
"alias": "Down",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'nova-api' AND value = 0 AND $timeFilter"
},
"nova-api-up-status": {
"alias": "Up",
"rawQuery": true,
"query": "SELECT count(value) FROM openstack_check_api WHERE environment_label = '$environment' AND service = 'nova-api' AND value = 1 AND $timeFilter"
}
},
"title": "Nova API Availability"
},
"nova-api-requests": {
"target": {
"nova-api-okay-status": {
"alias": "Okay",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_nova_http_response_times WHERE environment_label = '$environment' AND (http_status = '2xx' OR http_status = '3xx') AND $timeFilter"
},
"nova-api-error-status": {
"alias": "Error",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_nova_http_response_times WHERE environment_label = '$environment' AND http_status = '4xx' AND $timeFilter"
},
"nova-api-fatal-status": {
"alias": "Fatal",
"rawQuery": true,
"query": "SELECT count(max) FROM openstack_nova_http_response_times WHERE environment_label = '$environment' AND http_status = '5xx' AND $timeFilter"
}
},
"title": "Nova API Requests"
}
}
}
}
},
"linux_influxdb": {
"datasource": "influxdb",
"template": "linux/files/grafana_dashboards/system_influxdb.json",
"format": "json"
},
"memcached_prometheus": {
"datasource": "prometheus",
"template": "memcached/files/grafana_dashboards/memcached_prometheus.json",
"format": "json"
},
"nova_instances_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_instances_prometheus.json",
"format": "json"
},
"apache_prometheus": {
"datasource": "prometheus",
"template": "apache/files/grafana_dashboards/apache_prometheus.json",
"format": "json"
},
"nova_influxdb": {
"datasource": "influxdb",
"template": "nova/files/grafana_dashboards/nova_influxdb.json",
"format": "json"
},
"main_prometheus": {
"datasource": "prometheus",
"row": {
"ost-middleware": {
"title": "Middleware",
"panel": {
"apache": {
"target": {
"cluster_status": {
"expr": "avg(apache_up) by (name)"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Apache",
"title": "Apache"
}
],
"title": "Apache"
},
"haproxy": {
"target": {
"cluster_status": {
"expr": "avg(haproxy_up) by (name)"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "HAProxy",
"title": "HAProxy"
}
],
"title": "HAProxy"
},
"glusterfs": {
"target": {
"cluster_status": {
"expr": "avg(glusterfs_up) by (name)"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "GlusterFS",
"title": "GlusterFS"
}
],
"title": "GlusterFS"
},
"memcached": {
"target": {
"cluster_status": {
"expr": "avg(memcached_up) by (name)"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Memcached",
"title": "Memcached"
}
],
"title": "Memcached"
}
}
},
"ost-control-plane": {
"title": "OpenStack Control Plane",
"panel": {
"nova": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=\"nova\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Nova",
"title": "Nova"
}
],
"title": "Nova"
},
"heat": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=~\"heat.*\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Heat",
"title": "Heat"
}
],
"title": "Heat"
},
"keystone": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=~\"keystone.*public.*\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Keystone",
"title": "Keystone"
}
],
"title": "Keystone"
},
"cinder": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=~\"cinder.*\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Cinder",
"title": "Cinder"
}
],
"title": "Cinder"
},
"glance": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=~\"glance.*\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Glance",
"title": "Glance"
}
],
"title": "Glance"
},
"neutron": {
"target": {
"cluster_status": {
"expr": "avg(openstack_api_check_status{service=~\"neutron.*\"})"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Neutron",
"title": "Neutron"
}
],
"title": "Neutron"
}
}
}
}
},
"haproxy_influxdb": {
"datasource": "influxdb",
"template": "haproxy/files/grafana_dashboards/haproxy_influxdb.json",
"format": "json"
},
"heat_influxdb": {
"datasource": "influxdb",
"template": "heat/files/grafana_dashboards/heat_influxdb.json",
"format": "json"
},
"keepalived_prometheus": {
"datasource": "prometheus",
"template": "keepalived/files/grafana_dashboards/keepalived_prometheus.json",
"format": "json"
},
"nova_hypervisor_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_hypervisor_prometheus.json",
"format": "json"
},
"main_influxdb": {
"datasource": "influxdb",
"row": {
"ost-data-plane": {
"title": "OpenStack Data Plane",
"panel": {
"cinder": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'cinder-data' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Cinder",
"title": "Cinder"
}
],
"title": "Cinder"
},
"nova": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'nova-data' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Nova",
"title": "Nova"
}
],
"title": "Nova"
}
}
},
"ost-middleware": {
"title": "Middleware",
"panel": {
"apache": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'apache' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Apache",
"title": "Apache"
}
],
"title": "Apache"
},
"haproxy": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'haproxy-openstack' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "HAProxy",
"title": "HAProxy"
}
],
"title": "HAProxy"
},
"memcached": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'memcached' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Memcached",
"title": "Memcached"
}
],
"title": "Memcached"
}
}
},
"ost-control-plane": {
"title": "OpenStack Control Plane",
"panel": {
"nova": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'nova-control' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Nova",
"title": "Nova"
}
],
"title": "Nova"
},
"heat": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'heat' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Heat",
"title": "Heat"
}
],
"title": "Heat"
},
"keystone": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'keystone' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Keystone",
"title": "Keystone"
}
],
"title": "Keystone"
},
"cinder": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'cinder-control' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Cinder",
"title": "Cinder"
}
],
"title": "Cinder"
},
"glance": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'glance' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Glance",
"title": "Glance"
}
],
"title": "Glance"
},
"neutron": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'neutron-control' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Neutron",
"title": "Neutron"
}
],
"title": "Neutron"
}
}
}
}
},
"keystone_prometheus": {
"datasource": "prometheus",
"template": "keystone/files/grafana_dashboards/keystone_prometheus_fluentd.json",
"format": "json"
},
"glance_influxdb": {
"datasource": "influxdb",
"template": "glance/files/grafana_dashboards/glance_influxdb.json",
"format": "json"
},
"nova_overview_prometheus": {
"datasource": "prometheus",
"template": "nova/files/grafana_dashboards/nova_overview_prometheus.json",
"format": "json"
},
"memcached_influxdb": {
"datasource": "influxdb",
"template": "memcached/files/grafana_dashboards/memcached_influxdb.json",
"format": "json"
},
"linux_overview_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_overview_prometheus.json",
"format": "json"
}
},
"parameters": null
},
"ssh_fingerprints": {
"rsa": "90:35:c6:81:d9:59:58:86:4a:ba:a0:f4:dd:cc:c6:e3",
"ecdsa": "c1:30:3a:40:42:a8:cc:40:a7:3b:9a:5b:35:d5:4e:f1",
"dsa": "a2:7a:52:8c:8b:4f:ee:68:99:69:4a:ae:a0:c9:e8:e0"
},
"gid": 0,
"master": "10.10.0.15",
"ipv4": [
"10.10.100.7",
"10.11.0.11",
"10.12.100.14",
"10.13.100.22",
"127.0.0.1"
],
"dns": {
"domain": "",
"sortlist": [],
"nameservers": [
"172.18.176.6"
],
"ip4_nameservers": [
"172.18.176.6"
],
"search": [
"openstacklocal"
],
"ip6_nameservers": [],
"options": []
},
"ipv6": [
"::1",
"fe80::f816:3eff:fe06:f3b",
"fe80::f816:3eff:fe0c:1532",
"fe80::f816:3eff:fe4e:f3c8",
"fe80::f816:3eff:fedb:db4f"
],
"cinder_policy": {
"volume:create_volume_metadata": "rule:admin_or_owner",
"volume:delete_snapshot_metadata": "rule:admin_or_owner",
"volume:failover_host": "rule:admin_api",
"volume_extension:capabilities": "rule:admin_api",
"volume_extension:volume_admin_actions:force_detach": "rule:admin_api",
"backup:delete": "rule:admin_or_owner",
"volume_extension:volume_encryption_metadata": "rule:admin_or_owner",
"clusters:get": "rule:admin_api",
"message:get": "rule:admin_or_owner",
"volume_extension:quotas:update": "rule:admin_api",
"snapshot_extension:snapshot_actions:update_snapshot_status": "",
"scheduler_extension:scheduler_stats:get_pools": "rule:admin_api",
"backup:restore": "rule:admin_or_owner",
"volume_extension:volume_tenant_attribute": "rule:admin_or_owner",
"volume_extension:services:update": "rule:admin_api",
"volume_extension:snapshot_admin_actions:reset_status": "rule:admin_api",
"group:create_group_snapshot": "",
"volume_extension:volume_admin_actions:force_delete": "rule:admin_api",
"volume:update_readonly_flag": "rule:admin_or_owner",
"backup:backup-export": "rule:admin_api",
"group:group_types_manage": "rule:admin_api",
"volume:create_from_image": "",
"backup:backup-import": "rule:admin_api",
"backup:get_all": "rule:admin_or_owner",
"group:delete": "rule:admin_or_owner",
"volume_extension:types_manage": "rule:admin_api",
"group:get_all_group_snapshots": "rule:admin_or_owner",
"volume:update_volume_metadata": "rule:admin_or_owner",
"volume:accept_transfer": "",
"default": "rule:admin_or_owner",
"volume_extension:snapshot_admin_actions:force_delete": "rule:admin_api",
"volume:force_delete": "rule:admin_api",
"volume_extension:volume_mig_status_attribute": "rule:admin_api",
"admin_api": "is_admin:True or (role:admin and is_admin_project:True)",
"consistencygroup:get_all": "group:nobody",
"volume_extension:quota_classes": "rule:admin_api",
"backup:create": "",
"volume:get_all": "rule:admin_or_owner",
"snapshot_extension:snapshot_unmanage": "rule:admin_api",
"volume_extension:volume_image_metadata": "rule:admin_or_owner",
"volume:update_snapshot_metadata": "rule:admin_or_owner",
"volume_extension:volume_type_access:removeProjectAccess": "rule:admin_api",
"group:create": "",
"volume:extend": "rule:admin_or_owner",
"volume:get_snapshot_metadata": "rule:admin_or_owner",
"volume_extension:volume_admin_actions:migrate_volume_completion": "rule:admin_api",
"consistencygroup:create": "group:nobody",
"workers:cleanup": "rule:admin_api",
"volume:get_transfer": "rule:admin_or_owner",
"group:reset_status": "rule:admin_api",
"group:get": "rule:admin_or_owner",
"group:update": "rule:admin_or_owner",
"volume_extension:volume_manage": "rule:admin_api",
"volume:get_snapshot": "rule:admin_or_owner",
"volume:create": "",
"volume:update_snapshot": "rule:admin_or_owner",
"volume_extension:quotas:show": "",
"volume_extension:hosts": "rule:admin_api",
"group:update_group_snapshot": "rule:admin_or_owner",
"volume_extension:volume_type_access": "rule:admin_or_owner",
"volume:get_all_snapshots": "rule:admin_or_owner",
"group:get_group_snapshot": "rule:admin_or_owner",
"volume:get_all_transfers": "rule:admin_or_owner",
"volume:freeze_host": "rule:admin_api",
"consistencygroup:get": "group:nobody",
"consistencygroup:create_cgsnapshot": "group:nobody",
"volume_extension:types_extra_specs": "rule:admin_api",
"volume:get": "rule:admin_or_owner",
"volume:get_volume_metadata": "rule:admin_or_owner",
"volume_extension:backup_admin_actions:force_delete": "rule:admin_api",
"consistencygroup:update": "group:nobody",
"group:access_group_types_specs": "rule:admin_api",
"volume_extension:volume_unmanage": "rule:admin_api",
"volume_extension:backup_admin_actions:reset_status": "rule:admin_api",
"group:get_all": "rule:admin_or_owner",
"group:reset_group_snapshot_status": "rule:admin_api",
"volume:update_volume_admin_metadata": "rule:admin_api",
"clusters:update": "rule:admin_api",
"admin_or_owner": "is_admin:True or (role:admin and is_admin_project:True) or project_id:%(project_id)s",
"volume_extension:volume_admin_actions:reset_status": "rule:admin_api",
"volume_extension:volume_actions:upload_image": "rule:admin_or_owner",
"volume_extension:volume_actions:upload_public": "rule:admin_api",
"volume_extension:list_manageable": "rule:admin_api",
"volume_extension:extended_snapshot_attributes": "rule:admin_or_owner",
"volume:delete_transfer": "rule:admin_or_owner",
"volume:create_snapshot": "rule:admin_or_owner",
"snapshot_extension:list_manageable": "rule:admin_api",
"volume_extension:quotas:delete": "rule:admin_api",
"consistencygroup:delete": "group:nobody",
"consistencygroup:delete_cgsnapshot": "group:nobody",
"volume_extension:volume_host_attribute": "rule:admin_api",
"volume:delete_volume_metadata": "rule:admin_or_owner",
"backup:get": "rule:admin_or_owner",
"backup:backup_project_attribute": "rule:admin_api",
"volume:create_transfer": "rule:admin_or_owner",
"volume:retype": "rule:admin_or_owner",
"clusters:get_all": "rule:admin_api",
"volume_extension:quota_classes:validate_setup_for_nested_quota_use": "rule:admin_api",
"backup:update": "rule:admin_or_owner",
"volume:delete_snapshot": "rule:admin_or_owner",
"snapshot_extension:snapshot_manage": "rule:admin_api",
"volume_extension:volume_admin_actions:migrate_volume": "rule:admin_api",
"volume_extension:services:index": "rule:admin_api",
"volume:delete": "rule:admin_or_owner",
"volume_extension:access_types_extra_specs": "rule:admin_api",
"volume_extension:volume_type_encryption": "rule:admin_api",
"consistencygroup:get_all_cgsnapshots": "group:nobody",
"group:group_type_access": "rule:admin_or_owner",
"message:delete": "rule:admin_or_owner",
"message:get_all": "rule:admin_or_owner",
"group:group_types_specs": "rule:admin_api",
"volume_extension:access_types_qos_specs_id": "rule:admin_api",
"volume:update": "rule:admin_or_owner",
"consistencygroup:get_cgsnapshot": "group:nobody",
"group:delete_group_snapshot": "rule:admin_or_owner",
"volume:get_volume_admin_metadata": "rule:admin_api",
"volume_extension:volume_type_access:addProjectAccess": "rule:admin_api",
"volume:thaw_host": "rule:admin_api"
},
"server_id": 82054315,
"cpu_flags": [
"fpu",
"vme",
"de",
"pse",
"tsc",
"msr",
"pae",
"mce",
"cx8",
"apic",
"sep",
"mtrr",
"pge",
"mca",
"cmov",
"pat",
"pse36",
"clflush",
"mmx",
"fxsr",
"sse",
"sse2",
"ss",
"syscall",
"nx",
"pdpe1gb",
"rdtscp",
"lm",
"constant_tsc",
"arch_perfmon",
"rep_good",
"nopl",
"xtopology",
"cpuid",
"pni",
"pclmulqdq",
"vmx",
"ssse3",
"fma",
"cx16",
"pcid",
"sse4_1",
"sse4_2",
"x2apic",
"movbe",
"popcnt",
"tsc_deadline_timer",
"aes",
"xsave",
"avx",
"f16c",
"rdrand",
"hypervisor",
"lahf_lm",
"abm",
"3dnowprefetch",
"cpuid_fault",
"invpcid_single",
"pti",
"ssbd",
"ibrs",
"ibpb",
"tpr_shadow",
"vnmi",
"flexpriority",
"ept",
"vpid",
"fsgsbase",
"tsc_adjust",
"bmi1",
"hle",
"avx2",
"smep",
"bmi2",
"erms",
"invpcid",
"rtm",
"rdseed",
"adx",
"smap",
"xsaveopt",
"arat"
],
"osfullname": "Ubuntu",
"localhost": "ctl01",
"lsb_distrib_id": "Ubuntu",
"username": "root",
"fqdn_ip4": [
"10.11.0.11"
],
"shell": "/bin/sh",
"nodename": "ctl01",
"saltversion": "2017.7.8",
"lsb_distrib_release": "16.04",
"pid": 1775,
"saltpath": "/usr/lib/python2.7/dist-packages/salt",
"pythonversion": [
2,
7,
12,
"final",
0
],
"host": "ctl01",
"os_family": "Debian",
"oscodename": "xenial",
"services": [
"fluentd",
"telegraf",
"glusterfs",
"rsyslog",
"linux",
"glance",
"keepalived",
"nova",
"grafana",
"keystone",
"memcached",
"openscap",
"neutron",
"ntp",
"nginx",
"heat",
"apache",
"haproxy",
"openssh",
"opencontrail",
"logrotate",
"_reclass_",
"prometheus",
"cinder",
"salt"
],
"osfinger": "Ubuntu-16.04",
"biosreleasedate": "04/01/2014",
"dns_records": [
{
"names": [
"ctl01.ozhurba-os-oc-cicd-sl.local",
"ctl01"
],
"address": "10.11.0.11"
}
],
"lsb_distrib_description": "Ubuntu 16.04.5 LTS",
"sphinx": {
"doc": {
"haproxy": {
"role": {
"proxy": {
"endpoint": {
"glance_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "glance_api",
"address": "http://10.11.0.10:9292/"
},
"nova_metadata_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "nova_metadata_api",
"address": "http://10.11.0.10:8775/"
},
"heat_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "heat_api",
"address": "http://10.11.0.10:8004/"
},
"cinder_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "cinder_api",
"address": "http://10.11.0.10:8776/"
},
"heat_cloudwatch_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "heat_cloudwatch_api",
"address": "http://10.11.0.10:8003/"
},
"heat_cfn_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "heat_cfn_api",
"address": "http://10.11.0.10:8000/"
},
"nova_novnc_general-service": {
"type": "general-service",
"protocol": "http",
"name": "nova_novnc",
"address": "http://10.11.0.10:6080/"
},
"placement_api_http": {
"type": "http",
"protocol": "http",
"mode": "http",
"name": "placement_api",
"address": "http://10.11.0.10:8778/"
},
"keystone_public_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "keystone_public_api",
"address": "http://10.11.0.10:5000/"
},
"keystone_admin_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "keystone_admin_api",
"address": "http://10.11.0.10:35357/"
},
"glance_registry_api_general-service": {
"type": "general-service",
"protocol": "http",
"name": "glance_registry_api",
"address": "http://10.11.0.10:9191/"
},
"neutron_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "neutron_api",
"address": "http://10.11.0.10:9696/"
},
"nova_api_openstack-service": {
"type": "openstack-service",
"protocol": "http",
"name": "nova_api",
"address": "http://10.11.0.10:8774/"
}
},
"name": "proxy",
"param": {
"version": {
"value": "1.6.3 2015/12/25 Copyright 2000-2015 Willy Tarreau <willy@haproxy.org>"
}
}
}
},
"description": "The Reliable, High Performance TCP/HTTP Load Balancer.",
"name": "HAProxy"
},
"keepalived": {
"role": {
"cluster": {
"name": "cluster",
"param": {
"router_VIP": {
"name": "Instance VIP",
"value": "10.11.0.10 at ens3, priority 103 of router 50"
}
}
}
},
"description": "The main goal of this project is to provide simple and robust facilities for loadbalancing and high-availability to Linux system and Linux based infrastructures.",
"name": "Keepalived"
},
"heat": {
"role": {
"server": {
"endpoint": {
"heat_api_cfn": {
"type": "heat-api-cfn",
"protocol": "http",
"name": "heat-api-cfn",
"address": "http://10.11.0.11:8003"
},
"heat_api_cloudwatch": {
"type": "heat-api-cloudwatch",
"protocol": "http",
"name": "heat-api-cloudwatch",
"address": "http://10.11.0.11:8000"
},
"heat_api": {
"type": "heat-api",
"protocol": "http",
"name": "heat-api",
"address": "http://10.11.0.11:8004"
}
},
"name": "server",
"param": {
"message_queue_ip": {
"name": "Message queue",
"value": "openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.41:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.42:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.43:5672//openstack"
},
"bind": {
"value": "10.11.0.11:8004"
},
"version": {
"name": "Version",
"value": "queens"
},
"identity_host": {
"name": "Identity service",
"value": "heat@10.11.0.10:35357"
},
"packages": {
"value": "* heat-api: 1:10.0.3-1.0~u16.04+mcp59\n* heat-api-cfn: 1:10.0.3-1.0~u16.04+mcp59\n* heat-engine: 1:10.0.3-1.0~u16.04+mcp59\n* heat-common: 1:10.0.3-1.0~u16.04+mcp59\n* python-heatclient: 1.14.0-1.0~u16.04+mcp6\n* gettext-base: 0.19.7-2ubuntu3.1"
},
"database_host": {
"name": "Database",
"value": "heat@10.11.0.50:3306//heat"
}
}
}
},
"description": "Heat implements an orchestration engine to launch multiple composite cloud applications based on templates in the form of text files that can be treated like code.",
"name": "Heat"
},
"nova": {
"role": {
"controller": {
"endpoint": {
"nova_ec2_api": {
"type": "nova-ec2-api",
"protocol": "http",
"name": "nova-ec2-api",
"address": "http://10.11.0.11:8773"
},
"nova_api": {
"type": "nova-api",
"protocol": "http",
"name": "nova-api",
"address": "http://10.11.0.11:8774"
},
"nova_metadata": {
"type": "nova-metadata",
"protocol": "http",
"name": "nova-metadata",
"address": "http://10.11.0.11:8775"
}
},
"name": "controller",
"param": {
"network_host": {
"name": "Network service",
"value": "10.11.0.10:9696"
},
"message_queue_ip": {
"name": "Message queue",
"value": "openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.41:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.42:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.43:5672//openstack"
},
"networking": {
"name": "Networking plugin",
"value": "default"
},
"identity_host": {
"name": "Identity host ip",
"value": "nova@10.11.0.10:35357"
},
"dhcp_domain": {
"name": "DHCP domain",
"value": "novalocal"
},
"bind": {
"value": "10.11.0.11"
},
"workers": {
"name": "Number of osapi and conductor workers",
"value": 8
},
"vncproxy_url": {
"name": "VNC proxy URL",
"value": "https://10.13.250.9:6080"
},
"ram_allocation_ratio": {
"name": "RAM allocation ratio",
"value": 1.5
},
"glance_host": {
"name": "Glance service",
"value": "10.11.0.10:9292"
},
"version": {
"name": "Version",
"value": "queens"
},
"scheduler_default_filters": {
"name": "Scheduler default filters",
"value": "DifferentHostFilter SameHostFilter RetryFilter AvailabilityZoneFilter RamFilter CoreFilter DiskFilter ComputeFilter ComputeCapabilitiesFilter ImagePropertiesFilter ServerGroupAntiAffinityFilter ServerGroupAffinityFilter PciPassthroughFilter NUMATopologyFilter AggregateInstanceExtraSpecsFilter"
},
"disk_allocation_ratio": {
"name": "Disk allocation ratio",
"value": 1.0
},
"cpu_allocation_ratio": {
"name": "CPU allocation ratio",
"value": 16.0
},
"packages": {
"value": "* nova-common: 2:17.0.10-7~u16.01+mcp188\n* nova-consoleproxy: 2:17.0.10-7~u16.01+mcp188\n* novnc: 1:0.6.1-1.1~u16.04+mcp3\n* nova-api: 2:17.0.10-7~u16.01+mcp188\n* nova-conductor: 2:17.0.10-7~u16.01+mcp188\n* nova-consoleauth: 2:17.0.10-7~u16.01+mcp188\n* nova-doc: 2:17.0.9-6~u16.01+mcp189\n* nova-scheduler: 2:17.0.10-7~u16.01+mcp188\n* python-novaclient: 2:9.1.1-1~u16.04+mcp6\n* python-memcache: 1.57+fixed-1~u16.04+mcp1\n* gettext-base: 0.19.7-2ubuntu3.1\n* python-pycadf: 2.7.0-1~u16.04+mcp3"
},
"database_host": {
"name": "Database",
"value": "nova@10.11.0.50:3306/nova"
}
}
}
},
"description": "OpenStack Nova provides a cloud computing fabric controller, supporting a wide variety of virtualization technologies, including KVM, Xen, LXC, VMware, and more.",
"name": "Nova"
},
"glusterfs": {
"role": {
"client": {
"name": "client",
"param": {
"mounts": {
"value": {
"glance": "/var/lib/glance/images"
}
}
}
}
},
"description": "An open source, distributed file system capable of scaling to several petabytes and handling thousands of clients.",
"name": "GlusterFS"
},
"keystone": {
"role": {
"client": {
"name": "client",
"param": {}
},
"server": {
"endpoint": {
"keystone_api_admin": {
"type": "keystone-api-admin",
"protocol": "http",
"name": "keystone-api-admin",
"address": "http://10.11.0.11:35357"
},
"keystone_api_public": {
"type": "keystone-api-public",
"protocol": "http",
"name": "keystone-api-public",
"address": "http://10.11.0.11:5000"
}
},
"name": "server",
"param": {
"service_tenant": {
"value": "service"
},
"token_engine": {
"value": "fernet"
},
"bind": {
"value": "10.11.0.11:5000"
},
"region": {
"name": "Region",
"value": "RegionOne"
},
"private_bind": {
"value": "10.11.0.11:35357"
},
"version": {
"name": "Version",
"value": "queens"
},
"services": {
"value": ""
},
"packages": {
"value": "* keystone: 2:13.0.2-3~u16.04+mcp19\n* python-keystone: 2:13.0.2-3~u16.04+mcp19\n* python-keystoneclient: 1:3.15.0-1.0~u16.04+mcp12\n* python-psycopg2: 2.7.4-1.0~u16.04+mcp1\n* python-mysqldb: 1.3.7-1build2\n* mysql-client: 5.6.41-1~u16.04+mcp1\n* python-six: 1.10.0-3\n* python-memcache: 1.57+fixed-1~u16.04+mcp1\n* python-openstackclient: 3.14.3-1.0~u16.04+mcp18\n* gettext-base: 0.19.7-2ubuntu3.1\n* python-pycadf: 2.7.0-1~u16.04+mcp3"
},
"database_host": {
"name": "Database",
"value": "keystone@10.11.0.50:3306/keystone"
}
}
}
},
"description": "Keystone provides authentication, authorization and service discovery mechanisms via HTTP primarily for use by projects in the OpenStack family.",
"name": "Keystone"
},
"linux": {
"role": {
"network": {
"name": "Network",
"param": {
"ip": {
"name": "IP Addresses",
"value": [
"10.10.100.7",
"10.11.0.10",
"10.11.0.11",
"10.12.100.14",
"10.13.100.22",
"127.0.0.1"
]
},
"fqdn": {
"name": "FQDN",
"value": "ctl01.ozhurba-os-oc-cicd-sl.local"
}
}
},
"system": {
"name": "System",
"param": {
"kernel": {
"value": "Linux 4.15.0-43-generic"
},
"distribution": {
"value": "Ubuntu 16.04.5 LTS"
},
"name": {
"value": "ctl01"
}
}
}
},
"description": "Linux is a high performance, yet completely free, Unix-like operating system that is suitable for use on a wide range of computers and other products.",
"name": "Linux"
},
"apache": {
"role": {
"server": {
"endpoint": {
"keystone_keystone": {
"type": "keystone",
"protocol": "http",
"name": "keystone",
"address": "http://ctl01.ozhurba-os-oc-cicd-sl.local:80/"
}
},
"name": "server",
"param": {
"version": {
"value": "2.4.18 (Ubuntu)"
}
}
}
},
"description": "The Apache HTTP Server Project is a collaborative software development effort aimed at creating a robust, commercial-grade, featureful, and freely-available source code implementation of an HTTP (Web) server",
"name": "Apache"
},
"cinder": {
"role": {
"volume": {
"name": "volume",
"param": {
"backends": {
"value": ""
},
"packages": {
"value": "* cinder-volume: dpkg-query: no packages found matching cinder-volume\n* lvm2: 2.02.133-1ubuntu10\n* sysfsutils: 2.1.0+repack-4\n* sg3-utils: dpkg-query: no packages found matching sg3-utils\n* python-cinder: 2:12.0.7-2~u16.04+mcp100\n* python-mysqldb: 1.3.7-1build2\n* p7zip: dpkg-query: no packages found matching p7zip\n* gettext-base: 0.19.7-2ubuntu3.1\n* python-memcache: 1.57+fixed-1~u16.04+mcp1\n* python-pycadf: 2.7.0-1~u16.04+mcp3\n"
}
}
},
"controller": {
"endpoint": {
"glance_api": {
"type": "cinder-api",
"protocol": "http",
"name": "cinder-api",
"address": "http://10.11.0.11:8776"
}
},
"name": "controller",
"param": {
"message_queue_ip": {
"name": "Message queue",
"value": "openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.41:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.42:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.43:5672//openstack"
},
"version": {
"name": "Version",
"value": "queens"
},
"bind": {
"value": "10.11.0.11:8776"
},
"glance_host": {
"name": "Glance service",
"value": "10.11.0.10:9292"
},
"backends": {
"value": ""
},
"identity_host": {
"name": "Identity service",
"value": "cinder@10.11.0.10:35357"
},
"packages": {
"value": "* cinder-api: dpkg-query: no packages found matching cinder-api\n* cinder-scheduler: 2:12.0.7-2~u16.04+mcp100\n* lvm2: 2.02.133-1ubuntu10\n* python-cinder: 2:12.0.7-2~u16.04+mcp100\n* gettext-base: 0.19.7-2ubuntu3.1\n* python-memcache: 1.57+fixed-1~u16.04+mcp1\n* python-pycadf: 2.7.0-1~u16.04+mcp3\n"
},
"database_host": {
"name": "Database",
"value": "cinder@10.11.0.50:3306//cinder"
}
}
}
},
"description": "Cinder provides an infrastructure for managing volumes in OpenStack. It was originally a Nova component called nova-volume, but has become an independent project since the Folsom release.",
"name": "Cinder"
},
"glance": {
"role": {
"server": {
"endpoint": {
"glance_registry": {
"type": "glance-registry",
"protocol": "http",
"name": "glance-registry",
"address": "http://10.11.0.10:9191"
},
"glance_api": {
"type": "glance-api",
"protocol": "http",
"name": "glance-api",
"address": "http://10.11.0.11:9292"
}
},
"name": "server",
"param": {
"message_queue_ip": {
"name": "Message queue",
"value": "openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.41:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.42:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.43:5672//openstack"
},
"bind": {
"value": "10.11.0.11:9292"
},
"workers": {
"name": "Number of workers",
"value": 8
},
"storage_engine": {
"name": "Glance storage engine",
"value": "file"
},
"version": {
"name": "Version",
"value": "queens"
},
"identity_host": {
"name": "Identity service",
"value": "glance@10.11.0.10:35357"
},
"packages": {
"value": "* glance: 2:16.0.1-2~u16.04+mcp23\n* glance-api: 2:16.0.1-2~u16.04+mcp23\n* glance-registry: 2:16.0.1-2~u16.04+mcp23\n* glance-common: 2:16.0.1-2~u16.04+mcp23\n* python-glance: 2:16.0.1-2~u16.04+mcp23\n* python-glance-store: 0.23.0-2~u16.04+mcp6\n* python-glanceclient: 1:2.10.1-1.0~u16.04+mcp4\n* gettext-base: 0.19.7-2ubuntu3.1\n* python-memcache: 1.57+fixed-1~u16.04+mcp1\n* python-pycadf: 2.7.0-1~u16.04+mcp3"
},
"database_host": {
"name": "Database",
"value": "glance@10.11.0.50:3306//glance"
}
}
}
},
"description": "The Glance project provides services for discovering, registering, and retrieving virtual machine images.",
"name": "Glance"
},
"salt": {
"role": {
"minion": {
"name": "minion",
"param": {
"version": {
"value": "2017.7.8 (Nitrogen)"
}
}
}
},
"description": "Salt is a new approach to infrastructure management. Easy enough to get running in minutes, scalable enough to manage tens of thousands of servers, and fast enough to communicate with them in seconds.",
"name": "Salt"
},
"neutron": {
"role": {
"server": {
"endpoint": {
"neutron_server": {
"type": "neutron-server",
"protocol": "http",
"name": "neutron-server",
"address": "http://10.11.0.11:9696"
}
},
"name": "server",
"param": {
"message_queue_ip": {
"name": "Message queue",
"value": "openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.41:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.42:5672,openstack:vsCFaby8sPgbSu4YdtkaL912mkWAJw9b@10.11.0.43:5672//openstack"
},
"plugin": {
"value": "contrail"
},
"bind": {
"value": "10.11.0.11:9696"
},
"version": {
"name": "Version",
"value": "queens"
},
"identity_host": {
"name": "Identity service",
"value": "neutron@10.11.0.10:35357"
},
"compute_host": {
"name": "Compute service",
"value": "nova@10.11.0.10"
},
"packages": {
"value": "* neutron-server: 2:12.0.5-5~u16.04+mcp155\n* python-neutron-lbaas: 2:12.0.0-2~u16.04+mcp34\n* gettext-base: 0.19.7-2ubuntu3.1\n* python-pycadf: 2.7.0-1~u16.04+mcp3"
},
"database_host": {
"name": "Database",
"value": "neutron@10.11.0.50:3306//neutron"
}
}
}
},
"description": "Neutron is an OpenStack project to provide networking as a service between interface devices managed by other Openstack services.",
"name": "Neutron"
}
}
},
"num_gpus": 1,
"roles": [
"fluentd.agent",
"telegraf.agent",
"glusterfs.client",
"rsyslog.client",
"linux.storage",
"linux.system",
"linux.network",
"glance.client",
"glance.server",
"keepalived.cluster",
"nova.controller",
"grafana.collector",
"keystone.client",
"keystone.server",
"memcached.server",
"openscap.service",
"neutron.client",
"neutron.server",
"ntp.client",
"nginx.server",
"heat.server",
"apache.server",
"haproxy.proxy",
"openssh.server",
"opencontrail.client",
"logrotate.server",
"prometheus.collector",
"cinder.controller",
"salt.minion"
],
"virtual": "kvm",
"os": "Ubuntu",
"disks": [
"loop1",
"dm-1",
"loop6",
"vdb",
"loop4",
"dm-4",
"loop2",
"dm-2",
"loop0",
"dm-0",
"loop7",
"loop5",
"dm-5",
"vda",
"loop3",
"dm-3"
],
"cpu_model": "Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz",
"osmajorrelease": 16,
"pythonexecutable": "/usr/bin/python",
"productname": "OpenStack Nova",
"osarch": "amd64",
"cpuarch": "x86_64",
"lsb_distrib_codename": "xenial",
"osrelease_info": [
16,
4
],
"locale_info": {
"detectedencoding": "UTF-8",
"defaultlanguage": "en_US",
"defaultencoding": "UTF-8"
},
"gpus": [
{
"model": "GD 5446",
"vendor": "unknown"
}
],
"prometheus": {
"server": {
"recording": {},
"target": {
"static": {
"fluentd": {
"relabel_configs": [
{
"regex": "10.11.0.11:24231",
"source_labels": "__address__",
"target_label": "host",
"replacement": "ctl01"
}
],
"honor_labels": true,
"endpoint": [
{
"port": 24231,
"address": "10.11.0.11"
}
]
},
"telegraf": {
"relabel_configs": [
{
"regex": "10.11.0.11:9126",
"source_labels": "__address__",
"target_label": "host",
"replacement": "ctl01"
}
],
"honor_labels": true,
"endpoint": [
{
"port": 9126,
"address": "10.11.0.11"
}
]
}
},
"dns": {
"endpoint": [
{
"domain": [
"tasks.monitoring_remote_agent"
],
"type": "A",
"name": "remote_agent_openstack",
"port": "9127"
}
],
"enabled": true
}
},
"alert": {
"GlareApiOutage": {
"labels": {
"severity": "critical",
"service": "glance"
},
"annotations": {
"description": "Glare API is not accessible for the Glare endpoint in the OpenStack service catalog.",
"summary": "Glare API outage"
},
"if": "openstack_api_check_status{name=\"glare\"} == 0"
},
"ApacheWorkersAbsent": {
"labels": {
"severity": "minor",
"service": "apache"
},
"annotations": {
"description": "The Apache service on the {{ $labels.host }} node has no available workers for 2 minutes.",
"summary": "Apache has no available idle workers"
},
"for": "2m",
"if": "apache_IdleWorkers == 0"
},
"KeepalivedProcessNotResponsive": {
"labels": {
"severity": "major",
"service": "keepalived"
},
"annotations": {
"description": "The Keepalived process on the {{ $labels.host }} node is not responding.",
"summary": "Keepalived process is not responding"
},
"if": "keepalived_up == 0"
},
"NetdevBudgetRanOutsWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The rate of net_rx_action loops terminations on the {{ $labels.host }} node is {{ $value }} per second during the last 7 minutes. Modify the net.core.netdev_budget and net.core.netdev_budget_usecs kernel parameters.",
"summary": "CPU terminated 0.1 net_rx_action loops per second"
},
"for": "7m",
"if": "max(rate(nstat_time_squeeze[5m])) without (cpu) > 0.1"
},
"HaproxyHTTPResponse5xxTooHigh": {
"labels": {
"severity": "warning",
"service": "haproxy"
},
"annotations": {
"description": "The average per-second rate of 5xx HTTP errors on the {{ $labels.host }} node for the {{ $labels.proxy }} back end is {{ $value }} (as measured over the last 2 minutes).",
"summary": "HTTP 5xx responses on the {{ $labels.proxy }} back end"
},
"if": "rate(haproxy_http_response_5xx{sv=\"FRONTEND\"}[2m]) > 1"
},
"NovaServicesDownMinor": {
"labels": {
"severity": "minor",
"service": "nova"
},
"annotations": {
"description": "More than 30% {{ $labels.binary }} services are down.",
"summary": "30% of {{ $labels.binary }} services are down"
},
"if": "count(openstack_nova_service_state{binary!~\"nova-compute\"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~\"nova-compute\"}) by (binary) * 0.3"
},
"HeatApiEndpointsOutage": {
"labels": {
"severity": "critical",
"service": "heat"
},
"annotations": {
"description": "All available {{ $labels.name }} endpoints are not accessible for 2 minutes.",
"summary": "{{ $labels.name }} endpoints outage"
},
"for": "2m",
"if": "count(http_response_status{name=~\"heat.*-api\"} == 0) by (name) == count(http_response_status{name=~\"heat.*-api\"}) by (name)"
},
"NovaServiceDown": {
"labels": {
"severity": "minor",
"service": "nova"
},
"annotations": {
"description": "The {{ $labels.binary }} service on the {{ $labels.hostname }} node is down.",
"summary": "{{ $labels.binary }} service is down"
},
"if": "openstack_nova_service_state == 0"
},
"GlanceApiEndpointsDownMajor": {
"labels": {
"severity": "major",
"service": "glance"
},
"annotations": {
"description": "{{ $value }} {{ $labels.name }} endpoints (>= 50.0%) are not accessible for 2 minutes.",
"summary": "50.0% of {{ $labels.name }} endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=~\"glance.*\"} == 0) by (name) >= count(http_response_status{name=~\"glance.*\"}) by (name) * 0.5"
},
"SystemDiskFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The disk partition ({{ $labels.path }}) on the {{ $labels.host }} node is {{ $value }}% full for 2 minutes.",
"summary": "Disk partition {{ $labels.path }} is 85.0% full"
},
"for": "2m",
"if": "disk_used_percent >= 85.0"
},
"SystemMemoryFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of memory for 2 minutes.",
"summary": "90.0% of memory is used"
},
"for": "2m",
"if": "mem_used_percent >= 90.0"
},
"HaproxyServiceDown": {
"labels": {
"severity": "minor",
"service": "haproxy"
},
"annotations": {
"description": "The HAProxy service on the {{ $labels.host }} node is down.",
"summary": "HAProxy service is down"
},
"if": "haproxy_up != 1"
},
"ApacheServiceDown": {
"labels": {
"severity": "minor",
"service": "apache"
},
"annotations": {
"description": "The Apache service on the {{ $labels.host }} node is down.",
"summary": "Apache service is down"
},
"if": "apache_up != 1"
},
"HaproxyBackendDown": {
"labels": {
"severity": "minor",
"service": "haproxy"
},
"annotations": {
"description": "The {{ $labels.proxy }} back end on the {{ $labels.host }} node is down.",
"summary": "{{ $labels.proxy }} back end is down"
},
"if": "increase(haproxy_chkdown{sv=\"BACKEND\"}[1m]) > 0"
},
"CinderApiEndpointsDownMajor": {
"labels": {
"severity": "major",
"service": "cinder"
},
"annotations": {
"description": "{{ $value }} cinder-api endpoints (>= 50.0%) are not accessible for 2 minutes.",
"summary": "50.0% of cinder-api endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=~\"cinder-api\"} == 0) >= count(http_response_status{name=~\"cinder-api\"}) * 0.5"
},
"HeatApiEndpointsDownMajor": {
"labels": {
"severity": "major",
"service": "heat"
},
"annotations": {
"description": "{{ $value }} {{ $labels.name }} endpoints (>= 50.0%) are not accessible for 2 minutes.",
"summary": "50.0% of {{ $labels.name }} endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=~\"heat.*-api\"} == 0) by (name) >= count(http_response_status{name=~\"heat.*-api\"}) by (name) * 0.5"
},
"CinderApiEndpointsOutage": {
"labels": {
"severity": "critical",
"service": "cinder"
},
"annotations": {
"description": "All available cinder-api endpoints are not accessible for 2 minutes.",
"summary": "Cinder-api endpoints outage"
},
"for": "2m",
"if": "count(http_response_status{name=~\"cinder-api\"} == 0) == count(http_response_status{name=~\"cinder-api\"})"
},
"HaproxyServiceOutage": {
"labels": {
"severity": "critical",
"service": "haproxy"
},
"annotations": {
"description": "All HAProxy services within the {{ $labels.cluster }} are down.",
"summary": "HAProxy service outage"
},
"if": "count(label_replace(haproxy_up, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\") != 1) by (cluster) == count(label_replace(haproxy_up, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster)"
},
"HeatErrorLogsTooHigh": {
"labels": {
"severity": "warning",
"service": "heat"
},
"annotations": {
"description": "The average per-second rate of errors in Heat logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes).",
"summary": "High number of errors in Heat logs"
},
"if": "sum(rate(log_messages{service=\"heat\",level=~\"(?i:(error|emergency|fatal))\"}[5m])) without (level) > 0.2"
},
"NovaApiOutage": {
"labels": {
"severity": "critical",
"service": "nova"
},
"annotations": {
"description": "Nova API is not accessible for all available Nova endpoints in the OpenStack service catalog.",
"summary": "Nova API outage"
},
"if": "max(openstack_api_check_status{name=~\"nova.*|placement\"}) == 0"
},
"CinderApiEndpointDown": {
"labels": {
"severity": "minor",
"service": "cinder"
},
"annotations": {
"description": "The cinder-api endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.",
"summary": "Cinder-api endpoint is not accessible"
},
"for": "2m",
"if": "http_response_status{name=~\"cinder-api\"} == 0"
},
"NovaComputeServicesDownMajor": {
"labels": {
"severity": "major",
"service": "nova"
},
"annotations": {
"description": "More than 50% of nova-compute services are down.",
"summary": "More than 50% of nova-compute services are down"
},
"if": "count(openstack_nova_service_state{binary=\"nova-compute\"} == 0) >= count(openstack_nova_service_state{binary=\"nova-compute\"}) * 0.5"
},
"PacketsDroppedByCpuWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 10 minutes.",
"summary": "Increased number of CPU dropped packets"
},
"if": "floor(increase(nstat_packet_drop[10m])) > 0"
},
"KeepalivedUnknownState": {
"labels": {
"severity": "minor",
"service": "keepalived"
},
"annotations": {
"description": "The Keepalived VRRP {{ $labels.name }} is in the UNKNOWN state on the {{ $labels.host }} node.",
"summary": "Keepalived VRRP state is UNKNOWN"
},
"if": "keepalived_state == -1"
},
"HaproxyBackendOutage": {
"labels": {
"severity": "critical",
"service": "haproxy"
},
"annotations": {
"description": "All {{ $labels.proxy }} back ends are down.",
"summary": "{{ $labels.proxy }} back end outage"
},
"if": "max(haproxy_active_servers{sv=\"BACKEND\"}) by (proxy) + max(haproxy_backup_servers{sv=\"BACKEND\"}) by (proxy) == 0"
},
"KeystoneApiResponseTimeTooHigh": {
"labels": {
"severity": "warning",
"service": "keystone"
},
"annotations": {
"description": "The Keystone API response time for GET and POST requests on the {{ $labels.host }} node is higher than 3.0s for 2 minutes.",
"summary": "High response time of Keystone API"
},
"for": "2m",
"if": "max by(host) (openstack_http_response_times{service='keystone',quantile=\"0.9\",http_method=~\"^(GET|POST)$\",http_status=~\"^2..$\"}) >= 3.0"
},
"KeystoneErrorLogsTooHigh": {
"labels": {
"severity": "warning",
"service": "keystone"
},
"annotations": {
"description": "The average per-second rate of errors in Keystone logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes).",
"summary": "High number of errors in Keystone logs"
},
"if": "sum(rate(log_messages{service=\"keystone\",level=~\"(?i:(error|emergency|fatal))\"}[5m])) without (level) > 0.2"
},
"KeepalivedMultipleIPAddr": {
"labels": {
"severity": "major",
"service": "keepalived"
},
"annotations": {
"description": "The Keepalived {{ $labels.ip }} virtual IP is assigned more than once.",
"summary": "Keepalived VIP is assigned more than once"
},
"for": "2m",
"if": "count(ipcheck_assigned) by (ip) > 1"
},
"SystemLoadTooHighWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes.",
"summary": "System load is1.0"
},
"for": "5m",
"if": "system_load5 / system_n_cpus > 1.0"
},
"NovaApiEndpointsOutage": {
"labels": {
"severity": "critical",
"service": "nova"
},
"annotations": {
"description": "All available nova-api endpoints are not accessible for 2 minutes.",
"summary": "nova-api endpoints outage"
},
"for": "2m",
"if": "count(http_response_status{name=~\"nova-api\"} == 0) == count(http_response_status{name=~\"nova-api\"})"
},
"SystemDiskInodesFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of disk inodes in the {{ $labels.path }} volume for 2 minutes.",
"summary": "85.0% of inodes for {{ $labels.path }} are used"
},
"for": "2m",
"if": "100 * disk_inodes_used / disk_inodes_total >= 85.0"
},
"NovaComputeServicesDownMinor": {
"labels": {
"severity": "minor",
"service": "nova"
},
"annotations": {
"description": "More than 25% of nova-compute services are down.",
"summary": "More than 25% of nova-compute services are down"
},
"if": "count(openstack_nova_service_state{binary=\"nova-compute\"} == 0) >= count(openstack_nova_service_state{binary=\"nova-compute\"}) * 0.25"
},
"SshFailedLoginsTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} failed SSH login attempts on the {{ $labels.host }} node during the last 5 minutes.",
"summary": "5 failed SSH logins"
},
"if": "increase(failed_logins_total[5m]) > 5"
},
"NovaApiDown": {
"labels": {
"severity": "major",
"service": "nova"
},
"annotations": {
"description": "Nova API is not accessible for the {{ $labels.name }} endpoint.",
"summary": "{{ $labels.name }} endpoint is not accessible"
},
"if": "openstack_api_check_status{name=~\"nova.*|placement\"} == 0"
},
"MemcachedConnectionsNoneMinor": {
"labels": {
"severity": "minor",
"service": "memcached"
},
"annotations": {
"description": "The Memcached database on the {{ $labels.host }} node has no open connections.",
"summary": "Memcached has no open connections"
},
"if": "memcached_curr_connections == 0"
},
"NeutronApiOutage": {
"labels": {
"severity": "critical",
"service": "neutron"
},
"annotations": {
"description": "Neutron API is not accessible for the Neutron endpoint in the OpenStack service catalog.",
"summary": "Neutron API outage"
},
"if": "openstack_api_check_status{name=\"neutron\"} == 0"
},
"SystemMemoryFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of memory for 2 minutes.",
"summary": "95.0% of memory is used"
},
"for": "2m",
"if": "mem_used_percent >= 95.0"
},
"SystemCpuFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The average CPU usage on the {{ $labels.host }} node is {{ $value }}% for 2 minutes.",
"summary": "90.0% CPU usage"
},
"for": "2m",
"if": "100 - avg_over_time(cpu_usage_idle{cpu=\"cpu-total\"}[5m]) > 90.0"
},
"CinderErrorLogsTooHigh": {
"labels": {
"severity": "warning",
"service": "cinder"
},
"annotations": {
"description": "The average per-second rate of errors in Cinder logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes).",
"summary": "High number of errors in Cinder logs"
},
"if": "sum(rate(log_messages{service=\"cinder\",level=~\"(?i:(error|emergency|fatal))\"}[5m])) without (level) > 0.2"
},
"KeystoneApiEndpointsOutage": {
"labels": {
"severity": "critical",
"service": "keystone"
},
"annotations": {
"description": "All available {{ $labels.name }} endpoints are not accessible for 2 minutes.",
"summary": "{{ $labels.name }} endpoints outage"
},
"for": "2m",
"if": "count(http_response_status{name=~\"keystone.*\"} == 0) by (name) == count(http_response_status{name=~\"keystone.*\"}) by (name)"
},
"NeutronApiEndpointDown": {
"labels": {
"severity": "minor",
"service": "neutron"
},
"annotations": {
"description": "The neutron-api endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.",
"summary": "neutron-api endpoint is not accessible"
},
"for": "2m",
"if": "http_response_status{name=\"neutron-api\"} == 0"
},
"MemcachedConnectionsNoneMajor": {
"labels": {
"severity": "major",
"service": "memcached"
},
"annotations": {
"description": "The Memcached database has no open connections on all nodes.",
"summary": "Memcached has no open connections on all nodes"
},
"if": "count(memcached_curr_connections == 0) == count(memcached_up)"
},
"CinderServicesDownMinor": {
"labels": {
"severity": "minor",
"service": "cinder"
},
"annotations": {
"description": "{{ $value }} {{ $labels.binary }} services (>=30.0%) are down.",
"summary": "30.0% of {{ $labels.binary }} services are down"
},
"if": "count(openstack_cinder_service_state == 0) by (binary) >= on (binary) count(openstack_cinder_service_state) by (binary) * 0.3"
},
"KeepalivedFailedState": {
"labels": {
"severity": "minor",
"service": "keepalived"
},
"annotations": {
"description": "The Keepalived VRRP {{ $labels.name }} is in the FAILED state on the {{ $labels.host }} node.",
"summary": "Keepalived VRRP state is FAILED"
},
"if": "keepalived_state == 0"
},
"NeutronApiEndpointsOutage": {
"labels": {
"severity": "critical",
"service": "neutron"
},
"annotations": {
"description": "All available neutron-api endpoints are not accessible for 2 minutes.",
"summary": "neutron-api endpoints outage"
},
"for": "2m",
"if": "count(http_response_status{name=\"neutron-api\"} == 0) == count(http_response_status{name=\"neutron-api\"})"
},
"NovaServicesDownMajor": {
"labels": {
"severity": "major",
"service": "nova"
},
"annotations": {
"description": "More than 60% of {{ $labels.binary }} services are down.",
"summary": "More than 60% of {{ $labels.binary }} services are down"
},
"if": "count(openstack_nova_service_state{binary!~\"nova-compute\"} == 0) by (binary) >= on (binary) count(openstack_nova_service_state{binary!~\"nova-compute\"}) by (binary) * 0.6"
},
"CertificateExpirationWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.source }} certificate on the {{ $labels.host }} node expires in less than 60 days.",
"summary": "The certificate expires in less than 60 days"
},
"if": "x509_cert_expiry / (24 * 60 * 60) < 60"
},
"GlanceApiEndpointsOutage": {
"labels": {
"severity": "critical",
"service": "glance"
},
"annotations": {
"description": "All available {{ $labels.name }} endpoints are not accessible for 2 minutes.",
"summary": "{{ $labels.name }} endpoints outage"
},
"for": "2m",
"if": "count(http_response_status{name=~\"glance.*\"} == 0) by (name) == count(http_response_status{name=~\"glance.*\"}) by (name)"
},
"SaltMinionServiceDown": {
"labels": {
"severity": "critical",
"service": "salt"
},
"annotations": {
"description": "The salt-minion service on the {{ $labels.host }} node is down.",
"summary": "Salt-minion service is down"
},
"if": "procstat_running{process_name=\"salt-minion\"} == 0"
},
"CinderServiceDown": {
"labels": {
"severity": "minor",
"service": "cinder"
},
"annotations": {
"description": "The {{ $labels.binary }} service on the {{ $labels.hostname }} node is down.",
"summary": "{{ $labels.binary }} service is down"
},
"if": "openstack_cinder_service_state == 0"
},
"NovaApiEndpointDown": {
"labels": {
"severity": "minor",
"service": "nova"
},
"annotations": {
"description": "The nova-api endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.",
"summary": "nova-api endpoint is not accessible"
},
"for": "2m",
"if": "http_response_status{name=~\"nova-api\"} == 0"
},
"SystemLoadTooHighCritical": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes.",
"summary": "System load is2.0"
},
"for": "5m",
"if": "system_load5 / system_n_cpus > 2.0"
},
"HaproxyServiceDownMajor": {
"labels": {
"severity": "major",
"service": "haproxy"
},
"annotations": {
"description": "{{ $value }} HAProxy services (>= 50%) within the {{ $labels.cluster }} cluster are down.",
"summary": "50% of HAProxy services are down"
},
"if": "count(label_replace(haproxy_up, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\") != 1) by (cluster) >= 0.5 * count(label_replace(haproxy_up, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster)"
},
"PacketsDroppedByCpuMinor": {
"labels": {
"severity": "minor",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 10 minutes.",
"summary": "CPU dropped more than 100 packets"
},
"if": "floor(increase(nstat_packet_drop[10m])) > 100"
},
"CronProcessDown": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The cron process on the {{ $labels.host }} node is down.",
"summary": "Cron process is down"
},
"if": "procstat_running{process_name=\"cron\"} == 0"
},
"SshdProcessDown": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The SSH process on the {{ $labels.host }} node is down.",
"summary": "SSH process is down"
},
"if": "procstat_running{process_name=\"sshd\"} == 0"
},
"CinderServiceOutage": {
"labels": {
"severity": "critical",
"service": "cinder"
},
"annotations": {
"description": "All {{ $labels.binary }} services are down.",
"summary": "{{ $labels.binary }} service outage"
},
"if": "count(openstack_cinder_service_state == 0) by (binary) == on (binary) count(openstack_cinder_service_state) by (binary)"
},
"SystemDiskInodesFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of disk inodes in the {{ $labels.path }} volume for 2 minutes.",
"summary": "95.0% of inodes for {{ $labels.path }} are used"
},
"for": "2m",
"if": "100 * disk_inodes_used / disk_inodes_total >= 95.0"
},
"NovaErrorLogsTooHigh": {
"labels": {
"severity": "warning",
"service": "nova"
},
"annotations": {
"description": "The average rate of errors in Nova logs on the {{ $labels.host }} node is more than 0.2 error messages per second (as measured over the last 5 minutes).",
"summary": "High number of errors in Nova logs"
},
"if": "sum(rate(log_messages{service=\"nova\",level=~\"(?i:(error|emergency|fatal))\"}[5m])) without (level) > 0.2"
},
"KeystoneApiEndpointDown": {
"labels": {
"severity": "minor",
"service": "keystone"
},
"annotations": {
"description": "The {{ $labels.name }} endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.",
"summary": "{{ $labels.name }} endpoint is not accessible"
},
"for": "2m",
"if": "http_response_status{name=~\"keystone.*\"} == 0"
},
"SystemDiskFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The disk partition ({{ $labels.path }}) on the {{ $labels.host }} node is {{ $value }}% full for 2 minutes.",
"summary": "Disk partition {{ $labels.path }} is 95.0% full"
},
"for": "2m",
"if": "disk_used_percent >= 95.0"
},
"MemcachedConnectionThrottled": {
"labels": {
"severity": "warning",
"service": "memcached"
},
"annotations": {
"description": "An average of {{ $value }} client connections to the Memcached database on the {{ $labels.host }} node throttle for 2 minutes.",
"summary": " 5 throttled Memcached connections"
},
"for": "2m",
"if": "increase(memcached_conn_yields[1m]) > 5"
},
"KeystoneApiOutage": {
"labels": {
"severity": "critical",
"service": "keystone"
},
"annotations": {
"description": "Keystone API is not accessible for the Keystone endpoint in the OpenStack service catalog.",
"summary": "Keystone API outage"
},
"if": "openstack_api_check_status{name=~\"keystone.*\"} == 0"
},
"GlanceApiEndpointDown": {
"labels": {
"severity": "minor",
"service": "glance"
},
"annotations": {
"description": "The {{ $labels.name }} endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.",
"summary": "{{ $labels.name }} endpoint is not accessible"
},
"for": "2m",
"if": "http_response_status{name=~\"glance.*\"} == 0"
},
"HeatApiEndpointDown": {
"labels": {
"severity": "minor",
"service": "heat"
},
"annotations": {
"description": "The {{ $labels.name }} endpoint on the {{ $labels.host }} node is not accessible for 2 minutes.",
"summary": "{{ $labels.name }} endpoint is not accessible"
},
"for": "2m",
"if": "http_response_status{name=~\"heat.*-api\"} == 0"
},
"GlanceErrorLogsTooHigh": {
"labels": {
"severity": "warning",
"service": "glance"
},
"annotations": {
"description": "The average per-second rate of errors in Glance logs on the {{ $labels.host }} node is {{ $value }} (as measured over the last 5 minutes).",
"summary": "High number of errors in Glance logs"
},
"if": "sum(rate(log_messages{service=\"glance\",level=~\"(?i:(error|emergency|fatal))\"}[5m])) without (level) > 0.2"
},
"NeutronApiEndpointsDownMajor": {
"labels": {
"severity": "major",
"service": "neutron"
},
"annotations": {
"description": "{{ $value }} neutron-api endpoints (>= 50.0%) are not accessible for 2 minutes.",
"summary": "50.0% of neutron-api endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=\"neutron-api\"} == 0) >= count(http_response_status{name=\"neutron-api\"}) * 0.5"
},
"KeepalivedServiceOutage": {
"labels": {
"severity": "critical",
"service": "keepalived"
},
"annotations": {
"description": "All Keepalived processes within the {{ $labels.cluster }} cluster are down.",
"summary": "Keepalived service outage"
},
"if": "count(label_replace(procstat_running{process_name=\"keepalived\"}, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster) == count(label_replace(procstat_running{process_name=\"keepalived\"} == 0, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster)"
},
"MemcachedEvictionsLimit": {
"labels": {
"severity": "warning",
"service": "memcached"
},
"annotations": {
"description": "An average of {{ $value }} evictions in the Memcached database occurred on the {{ $labels.host }} node during the last minute.",
"summary": "10 Memcached evictions"
},
"if": "increase(memcached_evictions[1m]) > 10"
},
"NovaApiEndpointsDownMajor": {
"labels": {
"severity": "major",
"service": "nova"
},
"annotations": {
"description": "More than 60% of nova-api endpoints are not accessible for 2 minutes.",
"summary": "60% of nova-api endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=~\"nova-api\"} == 0) >= count(http_response_status{name=~\"nova-api\"}) * 0.6"
},
"HeatApiDown": {
"labels": {
"severity": "major",
"service": "heat"
},
"annotations": {
"description": "Heat API is not accessible for the {{ $labels.name }} endpoint.",
"summary": "{{ $labels.name }} endpoint is not accessible"
},
"if": "openstack_api_check_status{name=~\"heat.*\"} == 0"
},
"NovaServiceOutage": {
"labels": {
"severity": "critical",
"service": "nova"
},
"annotations": {
"description": "All {{ $labels.binary }} services are down.",
"summary": "{{ $labels.binary }} service outage"
},
"if": "count(openstack_nova_service_state == 0) by (binary) == on (binary) count(openstack_nova_service_state) by (binary)"
},
"SystemDiskErrorsTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.device }} disk on the {{ $labels.host }} node is reporting errors for 5 minutes.",
"summary": "Disk {{ $labels.device }} is failing"
},
"for": "5m",
"if": "increase(hdd_errors_total[1m]) > 0"
},
"CinderApiDown": {
"labels": {
"severity": "major",
"service": "cinder"
},
"annotations": {
"description": "Cinder API is not accessible for the {{ $labels.name }} endpoint.",
"summary": "{{ $labels.name }} endpoint is not accessible"
},
"if": "openstack_api_check_status{name=~\"cinder.*\"} == 0"
},
"CinderServicesDownMajor": {
"labels": {
"severity": "major",
"service": "cinder"
},
"annotations": {
"description": "{{ $value }} {{ $labels.binary }} services (>=60.0%) are down.",
"summary": "60.0% of {{ $labels.binary }} services are down"
},
"if": "count(openstack_cinder_service_state == 0) by (binary) >= on (binary) count(openstack_cinder_service_state) by (binary) * 0.6"
},
"MemcachedServiceDown": {
"labels": {
"severity": "minor",
"service": "memcached"
},
"annotations": {
"description": "The Memcached service on the {{ $labels.host }} node is down.",
"summary": "Memcached service is down"
},
"if": "memcached_up == 0"
},
"NtpOffsetTooHigh": {
"labels": {
"severity": "warning",
"service": "ntp"
},
"annotations": {
"description": "The NTP offset on the {{ $labels.host }} node is {{ $value }}ms for 2 minutes.",
"summary": "NTP offset reached the limit of 200ms"
},
"for": "2m",
"if": "ntpq_offset >= 200"
},
"GlanceApiOutage": {
"labels": {
"severity": "critical",
"service": "glance"
},
"annotations": {
"description": "Glance API is not accessible for the Glance endpoint in the OpenStack service catalog.",
"summary": "Glance API outage"
},
"if": "openstack_api_check_status{name=\"glance\"} == 0"
},
"SystemRxPacketsDroppedTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} packets received by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute.",
"summary": "60 received packets were dropped"
},
"if": "increase(net_drop_in[1m]) > 60 unless on (host,interface) bond_slave_active == 0"
},
"KeystoneApiEndpointssDownMajor": {
"labels": {
"severity": "major",
"service": "keystone"
},
"annotations": {
"description": "{{ $value }} {{ $labels.name }} endpoints (>= 50.0%) are not accessible for 2 minutes.",
"summary": "50.0% of {{ $labels.name }} endpoints are not accessible"
},
"for": "2m",
"if": "count(http_response_status{name=~\"keystone.*\"} == 0) by (name) >= count(http_response_status{name=~\"keystone.*\"}) by (name) * 0.5"
},
"SystemTxPacketsDroppedTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} packets transmitted by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute.",
"summary": "100 transmitted packets were dropped"
},
"if": "increase(net_drop_out[1m]) > 100"
},
"ApacheServiceOutage": {
"labels": {
"severity": "critical",
"service": "apache"
},
"annotations": {
"description": "The Apache services within the {{ $labels.cluster }} cluster are down.",
"summary": "Apache service outage"
},
"if": "count(label_replace(apache_up, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster) == count(label_replace(apache_up == 0, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster)"
},
"HaproxyBackendDownMajor": {
"labels": {
"severity": "major",
"service": "haproxy"
},
"annotations": {
"description": "{{ $value }} {{ $labels.proxy }} back ends (>= 50%) are down.",
"summary": "50% of {{ $labels.proxy }} back ends are down"
},
"if": "0.5 * avg(sum(haproxy_active_servers{type=\"server\"}) by (host, proxy) + sum(haproxy_backup_servers{type=\"server\"}) by (host, proxy)) by (proxy) >= avg(sum(haproxy_active_servers{type=\"backend\"}) by (host, proxy) + sum(haproxy_backup_servers{type=\"backend\"}) by (host, proxy)) by (proxy)"
},
"HeatApiOutage": {
"labels": {
"severity": "critical",
"service": "heat"
},
"annotations": {
"description": "Heat API is not accessible for all available Heat endpoints in the OpenStack service catalog.",
"summary": "Heat API outage"
},
"if": "max(openstack_api_check_status{name=~\"heat.*\"}) == 0"
},
"KeepalivedProcessDown": {
"labels": {
"severity": "major",
"service": "keepalived"
},
"annotations": {
"description": "The Keepalived process on the {{ $labels.host }} node is down.",
"summary": "Keepalived process is down"
},
"if": "procstat_running{process_name=\"keepalived\"} == 0"
},
"CertificateExpirationCritical": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.source }} certificate on the {{ $labels.host }} node expires in less than 30 days.",
"summary": "The certificate expires in less than 30 days"
},
"if": "x509_cert_expiry / (24 * 60 * 60) < 30"
},
"CinderApiOutage": {
"labels": {
"severity": "critical",
"service": "cinder"
},
"annotations": {
"description": "Cinder API is not accessible for all available Cinder endpoints in the OpenStack service catalog.",
"summary": "Cinder API outage"
},
"if": "max(openstack_api_check_status{name=~\"cinder.*\"}) == 0"
}
}
}
},
"glance_policy": {
"get_task": "",
"modify_metadef_namespace": "",
"add_metadef_resource_type_association": "",
"get_metadef_object": "",
"upload_image": "",
"get_image_location": "",
"list_metadef_resource_types": "",
"add_image": "",
"get_metadef_namespace": "",
"manage_image_cache": "role:admin",
"delete_member": "",
"deactivate": "",
"add_metadef_object": "",
"get_images": "",
"get_metadef_resource_type": "",
"delete_image": "",
"get_metadef_namespaces": "",
"get_metadef_objects": "",
"modify_metadef_property": "",
"publicize_image": "role:admin",
"add_metadef_namespace": "",
"get_member": "",
"add_member": "",
"set_image_location": "",
"communitize_image": "",
"tasks_api_access": "role:admin",
"modify_task": "",
"add_task": "",
"modify_member": "",
"context_is_admin": "role:admin",
"modify_image": "",
"add_metadef_property": "",
"get_metadef_properties": "",
"get_members": "",
"get_tasks": "",
"get_metadef_tag": "",
"reactivate": "",
"modify_metadef_tag": "",
"get_metadef_property": "",
"delete_image_location": "",
"copy_from": "",
"add_metadef_tags": "",
"default": "role:admin",
"download_image": "",
"modify_metadef_object": "",
"get_image": "",
"add_metadef_tag": "",
"get_metadef_tags": ""
},
"path": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"machine_id": "7b17cfb3a5724e06a2b1b8d17cc0e2cb",
"salt": {
"graph": [
{
"host": "ctl01.ozhurba-os-oc-cicd-sl.local",
"type": "software-system",
"service": "linux.system",
"relations": [
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//saltstack-2017.7//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//td-agent//xenial xenial contrib",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//openstack-queens//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//td-agent//xenial xenial contrib",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//percona//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//saltstack-2017.7//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//percona//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//glusterfs-3.8//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//opencontrail-4.0//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//ubuntu/ xenial-security main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//extra//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//extra//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial-updates main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial-security main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//openstack-queens//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
}
]
},
{
"host": "ctl01.ozhurba-os-oc-cicd-sl.local",
"type": "software-system",
"service": "ntp.client",
"relations": [
{
"host_external": "udp://10.10.0.15",
"direction": "source",
"type": "udp",
"service": "other-service"
},
{
"host_external": "udp://pool.ntp.org",
"direction": "source",
"type": "udp",
"service": "other-service"
}
]
},
{
"host": "ctl01.ozhurba-os-oc-cicd-sl.local",
"type": "software-config",
"service": "salt.minion",
"relations": [
{
"direction": "source",
"type": "tcp-0mq",
"service": "salt.master",
"host_from_target": "10.10.0.15"
}
]
}
]
}
},
"cfg01.fakedomain.local": {
"biosversion": "1.10.2-1.1~u16.04+mcp2",
"kernel": "Linux",
"domain": "ozhurba-os-oc-cicd-sl.local",
"uid": 0,
"zmqversion": "4.1.4",
"kernelrelease": "4.15.0-43-generic",
"pythonpath": [
"/usr/bin",
"/usr/lib/python2.7",
"/usr/lib/python2.7/plat-x86_64-linux-gnu",
"/usr/lib/python2.7/lib-tk",
"/usr/lib/python2.7/lib-old",
"/usr/lib/python2.7/lib-dynload",
"/usr/local/lib/python2.7/dist-packages",
"/usr/local/lib/python2.7/dist-packages/configparser-3.7.4-py2.7.egg",
"/usr/lib/python2.7/dist-packages"
],
"serialnumber": "157967c6-9d9f-49dc-9f27-73d403afd492",
"pid": 19577,
"telegraf": {
"remote_agent": {
"input": {},
"processor": {},
"dir": {
"config": "/srv/volumes/local/telegraf",
"config_d": "/srv/volumes/local/telegraf/telegraf.d"
},
"output": {}
},
"agent": {
"metric_batch_size": 1000,
"collection_jitter": 2,
"interval": 15,
"enabled": true,
"pkgs": [
"telegraf"
],
"round_interval": false,
"output": {
"prometheus_client": {
"engine": "prometheus",
"bind": {
"port": 9126,
"address": "0.0.0.0"
},
"string_as_label": false
}
},
"input": {
"kernel": null,
"processes": null,
"nstat": {
"fieldpass": [
"packet_drop",
"time_squeeze"
]
},
"x509": {
"sources": [
"/srv/salt/pki/ozhurba-os-oc-cicd-sl/10.13.250.9.crt"
]
},
"ntp": {
"template": "ntp/files/telegraf.conf"
},
"system": null,
"http_listener": {
"read_timeout": "10s",
"bind": {
"port": 8186,
"address": "127.0.0.1"
},
"tagexclude": [
"hostname"
],
"write_timeout": "10s"
},
"cpu": {
"totalcpu": true,
"percpu": false
},
"linux_sysctl_fs": null,
"diskio": null,
"procstat": {
"process": {
"sshd": {
"exe": "sshd"
},
"salt-minion": {
"pattern": "salt-minion"
},
"cron": {
"exe": "cron"
},
"dockerd": {
"exe": "dockerd"
},
"salt-master": {
"pattern": "salt-master"
},
"ntpd": {
"exe": "ntpd"
}
}
},
"net": null,
"disk": {
"ignore_fs": [
"aufs",
"rootfs",
"sysfs",
"proc",
"devtmpfs",
"devpts",
"tmpfs",
"fusectl",
"cgroup",
"overlay"
]
},
"mem": null,
"docker": {
"endpoint": "unix:///var/run/docker.sock",
"container_name_exclude": [
"*"
],
"timeout": 5,
"namepass": [
"docker",
"docker_swarm"
],
"perdevice": true,
"gather_services": true,
"total": false
},
"swap": null
},
"metric_buffer_limit": 10000,
"processor": {},
"dir": {
"config": "/etc/telegraf",
"config_d": "/etc/telegraf/telegraf.d"
}
}
},
"ip_interfaces": {
"ens4": [
"10.11.0.15",
"fe80::f816:3eff:fe8d:eff3"
],
"ens5": [
"10.12.100.8",
"fe80::f816:3eff:fe39:9558"
],
"ens6": [
"10.13.0.15",
"fe80::f816:3eff:fea6:7cae"
],
"vethaca0b7a": [
"fe80::38b7:15ff:fecc:9e2a"
],
"lo": [
"127.0.0.1",
"::1"
],
"ens3": [
"10.10.0.15",
"fe80::f816:3eff:fe79:d748"
],
"docker_gwbridge": [
"10.20.0.1",
"fe80::42:beff:fe69:f35d"
],
"veth31bea8f": [
"fe80::40ac:75ff:fe71:f26f"
]
},
"groupname": "root",
"fqdn_ip6": [],
"mem_total": 16040,
"saltversioninfo": [
2017,
7,
8,
0
],
"SSDs": [],
"mdadm": [],
"id": "cfg01.ozhurba-os-oc-cicd-sl.local",
"manufacturer": "OpenStack Foundation",
"osrelease": "16.04",
"ps": "ps -efHww",
"systemd": {
"version": "229",
"features": "+PAM +AUDIT +SELINUX +IMA +APPARMOR +SMACK +SYSVINIT +UTMP +LIBCRYPTSETUP +GCRYPT +GNUTLS +ACL +XZ -LZ4 +SECCOMP +BLKID +ELFUTILS +KMOD -IDN"
},
"fqdn": "cfg01.ozhurba-os-oc-cicd-sl.local",
"uuid": "6cf16038-e8ef-45da-8f9d-13f0ca824a31",
"ip6_interfaces": {
"ens4": [
"fe80::f816:3eff:fe8d:eff3"
],
"ens5": [
"fe80::f816:3eff:fe39:9558"
],
"ens6": [
"fe80::f816:3eff:fea6:7cae"
],
"vethaca0b7a": [
"fe80::38b7:15ff:fecc:9e2a"
],
"lo": [
"::1"
],
"ens3": [
"fe80::f816:3eff:fe79:d748"
],
"docker_gwbridge": [
"fe80::42:beff:fe69:f35d"
],
"veth31bea8f": [
"fe80::40ac:75ff:fe71:f26f"
]
},
"num_cpus": 4,
"hwaddr_interfaces": {
"ens4": "fa:16:3e:8d:ef:f3",
"ens5": "fa:16:3e:39:95:58",
"ens6": "fa:16:3e:a6:7c:ae",
"vethaca0b7a": "3a:b7:15:cc:9e:2a",
"lo": "00:00:00:00:00:00",
"ens3": "fa:16:3e:79:d7:48",
"docker_gwbridge": "02:42:be:69:f3:5d",
"veth31bea8f": "42:ac:75:71:f2:6f"
},
"init": "systemd",
"ip4_interfaces": {
"ens4": [
"10.11.0.15"
],
"ens5": [
"10.12.100.8"
],
"ens6": [
"10.13.0.15"
],
"vethaca0b7a": [],
"lo": [
"127.0.0.1"
],
"ens3": [
"10.10.0.15"
],
"docker_gwbridge": [
"10.20.0.1"
],
"veth31bea8f": []
},
"grafana": {
"dashboard": {
"linux_disk_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_disk_prometheus.json",
"format": "json"
},
"linux_influxdb": {
"datasource": "influxdb",
"template": "linux/files/grafana_dashboards/system_influxdb.json",
"format": "json"
},
"glusterfs_prometheus": {
"datasource": "prometheus",
"template": "glusterfs/files/grafana_dashboards/glusterfs_prometheus.json",
"format": "json"
},
"jenkins_prometheus": {
"datasource": "prometheus",
"template": "jenkins/files/grafana_dashboards/jenkins_prometheus.json",
"format": "json"
},
"main_prometheus": {
"datasource": "prometheus",
"row": {
"ost-middleware": {
"title": "Middleware",
"panel": {
"glusterfs": {
"target": {
"cluster_status": {
"expr": "avg(glusterfs_up) by (name)"
}
},
"links": [
{
"type": "dashboard",
"dashboard": "GlusterFS",
"title": "GlusterFS"
}
],
"title": "GlusterFS"
}
}
}
}
},
"glusterfs_influxdb": {
"datasource": "influxdb",
"template": "glusterfs/files/grafana_dashboards/glusterfs_influxdb.json",
"format": "json"
},
"linux_overview_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_overview_prometheus.json",
"format": "json"
},
"docker_prometheus": {
"datasource": "prometheus",
"template": "docker/files/grafana_dashboards/docker_prometheus.json",
"format": "json"
},
"ntp_prometheus": {
"datasource": "prometheus",
"template": "ntp/files/grafana_dashboards/ntp_prometheus.json",
"format": "json"
},
"main": {
"datasource": "influxdb",
"row": {
"docker-data-plane": {
"title": "Docker",
"panel": {
"docker": {
"target": {
"cluster_status": {
"query": "SELECT last(value) FROM cluster_status WHERE cluster_name = 'docker' AND environment_label = '$environment' AND $timeFilter GROUP BY time($interval) fill(null)",
"rawQuery": true
}
},
"links": [
{
"type": "dashboard",
"dashboard": "Docker",
"title": "Docker"
}
],
"title": "Docker"
}
}
}
}
},
"linux_network_prometheus": {
"datasource": "prometheus",
"template": "linux/files/grafana_dashboards/system_network_prometheus.json",
"format": "json"
},
"docker_influxdb": {
"datasource": "influxdb",
"template": "docker/files/grafana_dashboards/docker_influxdb.json",
"format": "json"
}
}
},
"ssh_fingerprints": {
"rsa": "76:1d:3d:51:56:9b:6c:2f:47:2a:e0:46:28:ce:f5:86",
"ecdsa": "e6:d4:2f:25:fa:5b:dd:35:00:9e:5c:65:d5:91:ac:8c",
"dsa": "53:2c:67:0f:03:57:ae:26:7d:65:de:bf:8c:41:1e:05"
},
"gid": 0,
"master": "10.10.0.15",
"ipv4": [
"10.10.0.15",
"10.11.0.15",
"10.12.100.8",
"10.13.0.15",
"10.20.0.1",
"127.0.0.1"
],
"dns": {
"domain": "",
"sortlist": [],
"nameservers": [
"172.18.176.6",
"172.17.44.91"
],
"ip4_nameservers": [
"172.18.176.6",
"172.17.44.91"
],
"search": [
"openstacklocal"
],
"ip6_nameservers": [],
"options": []
},
"ipv6": [
"::1",
"fe80::42:beff:fe69:f35d",
"fe80::38b7:15ff:fecc:9e2a",
"fe80::40ac:75ff:fe71:f26f",
"fe80::f816:3eff:fe39:9558",
"fe80::f816:3eff:fe79:d748",
"fe80::f816:3eff:fe8d:eff3",
"fe80::f816:3eff:fea6:7cae"
],
"server_id": 1095353950,
"cpu_flags": [
"fpu",
"vme",
"de",
"pse",
"tsc",
"msr",
"pae",
"mce",
"cx8",
"apic",
"sep",
"mtrr",
"pge",
"mca",
"cmov",
"pat",
"pse36",
"clflush",
"mmx",
"fxsr",
"sse",
"sse2",
"ss",
"syscall",
"nx",
"pdpe1gb",
"rdtscp",
"lm",
"constant_tsc",
"arch_perfmon",
"rep_good",
"nopl",
"xtopology",
"cpuid",
"pni",
"pclmulqdq",
"vmx",
"ssse3",
"fma",
"cx16",
"pcid",
"sse4_1",
"sse4_2",
"x2apic",
"movbe",
"popcnt",
"tsc_deadline_timer",
"aes",
"xsave",
"avx",
"f16c",
"rdrand",
"hypervisor",
"lahf_lm",
"abm",
"3dnowprefetch",
"cpuid_fault",
"invpcid_single",
"pti",
"ssbd",
"ibrs",
"ibpb",
"tpr_shadow",
"vnmi",
"flexpriority",
"ept",
"vpid",
"fsgsbase",
"tsc_adjust",
"bmi1",
"hle",
"avx2",
"smep",
"bmi2",
"erms",
"invpcid",
"rtm",
"rdseed",
"adx",
"smap",
"xsaveopt",
"arat"
],
"osfullname": "Ubuntu",
"localhost": "cfg01",
"lsb_distrib_id": "Ubuntu",
"username": "root",
"fqdn_ip4": [
"10.11.0.15"
],
"shell": "/bin/sh",
"nodename": "cfg01",
"saltversion": "2017.7.8",
"lsb_distrib_release": "16.04",
"saltpath": "/usr/lib/python2.7/dist-packages/salt",
"pythonversion": [
2,
7,
12,
"final",
0
],
"host": "cfg01",
"os_family": "Debian",
"oscodename": "xenial",
"services": [
"fluentd",
"telegraf",
"runtest",
"glusterfs",
"rsyslog",
"linux",
"glance",
"git",
"reclass",
"nova",
"grafana",
"keystone",
"jenkins",
"openscap",
"neutron",
"ntp",
"maas",
"nginx",
"prometheus",
"postgresql",
"openssh",
"logrotate",
"_reclass_",
"docker",
"salt"
],
"osfinger": "Ubuntu-16.04",
"biosreleasedate": "04/01/2014",
"dns_records": [
{
"names": [
"cfg01.ozhurba-os-oc-cicd-sl.local",
"cfg01"
],
"address": "10.11.0.15"
}
],
"lsb_distrib_description": "Ubuntu 16.04.5 LTS",
"sphinx": {
"doc": {
"reclass": {
"role": {
"storage": {
"name": "storage",
"param": {
"version": {
"value": "1.5.6"
}
}
}
},
"description": "reclass is an external node classifier (ENC) used with automation tools, such as Puppet, Salt, and Ansible.",
"name": "Reclass"
},
"maas": {
"role": {
"server": {
"name": "server",
"param": {
"some_param": {
"name": "Some name",
"value": "some value"
}
}
}
},
"description": "Some service info",
"name": "maas"
},
"glusterfs": {
"role": {
"client": {
"name": "client",
"param": {
"mounts": {
"value": {
"salt_pki": "/srv/salt/pki"
}
}
}
}
},
"description": "An open source, distributed file system capable of scaling to several petabytes and handling thousands of clients.",
"name": "GlusterFS"
},
"linux": {
"role": {
"network": {
"name": "Network",
"param": {
"ip": {
"name": "IP Addresses",
"value": [
"10.10.0.15",
"10.11.0.15",
"10.12.100.8",
"10.13.0.15",
"10.20.0.1",
"127.0.0.1"
]
},
"fqdn": {
"name": "FQDN",
"value": "cfg01.ozhurba-os-oc-cicd-sl.local"
}
}
},
"system": {
"name": "System",
"param": {
"kernel": {
"value": "Linux 4.15.0-43-generic"
},
"distribution": {
"value": "Ubuntu 16.04.5 LTS"
},
"name": {
"value": "cfg01"
}
}
}
},
"description": "Linux is a high performance, yet completely free, Unix-like operating system that is suitable for use on a wide range of computers and other products.",
"name": "Linux"
},
"jenkins": {
"role": {
"client": {
"name": "client",
"param": {
"master": {
"value": "10.10.0.15:8081"
},
"jobs": {
"value": [
"cvp-func",
"validate_openstack",
"cvp-ha",
"cvp-stacklight",
"cvp-perf",
"deploy_openstack",
"cvp-spt",
"cvp-sanity",
"deploy-openstack-compute"
]
}
}
},
"master": {
"name": "master",
"param": {}
}
},
"description": "Jenkins is an application that monitors executions of repeated jobs, such as building a software project or jobs run by cron.",
"name": "Jenkins"
},
"docker": {
"role": {
"swarm": {
"name": "swarm",
"param": {
"role": {
"value": "master"
},
"advertise_addr": {
"value": "10.11.0.15"
}
}
},
"host": {
"name": "host",
"param": {
"version": {
"value": "Docker version 18.09.0, build 4d60db4"
}
}
},
"client": {
"name": "client",
"param": {
"source": {
"value": {
"engine": "pkg"
}
},
"stacks": {
"value": [
"jenkins-master (image docker-prod-local.docker.mirantis.net/mirantis/cicd/jenkins:2019.2.0)",
"jenkins-slave01 (image docker-prod-local.docker.mirantis.net/mirantis/cicd/jnlp-slave:2019.2.0)",
"postgresql-postgresql-db (image docker-prod-local.docker.mirantis.net/mirantis/external/library/postgres:9.6.10)"
]
}
}
}
},
"description": "Docker is an open source project to pack, ship and run any application as a lightweight container.",
"name": "Docker"
},
"salt": {
"role": {
"minion": {
"name": "minion",
"param": {
"version": {
"value": "2017.7.8 (Nitrogen)"
}
}
},
"master": {
"name": "master",
"param": {
"version": {
"value": "2017.7.8 (Nitrogen)"
}
}
}
},
"description": "Salt is a new approach to infrastructure management. Easy enough to get running in minutes, scalable enough to manage tens of thousands of servers, and fast enough to communicate with them in seconds.",
"name": "Salt"
}
}
},
"num_gpus": 1,
"roles": [
"fluentd.agent",
"telegraf.agent",
"runtest.artifact_collector",
"runtest.tempest",
"runtest.salttest",
"glusterfs.client",
"rsyslog.client",
"linux.storage",
"linux.system",
"linux.network",
"glance.client",
"git.client",
"reclass.storage",
"nova.client",
"grafana.collector",
"keystone.client",
"jenkins.client",
"openscap.service",
"neutron.client",
"ntp.client",
"maas.cluster",
"maas.region",
"nginx.server",
"prometheus.collector",
"postgresql.server",
"openssh.server",
"logrotate.server",
"docker.host",
"docker.client",
"salt.minion",
"salt.api",
"salt.master"
],
"virtual": "kvm",
"os": "Ubuntu",
"disks": [
"loop1",
"loop6",
"vdb",
"loop4",
"loop2",
"loop0",
"loop7",
"loop5",
"vda",
"loop3"
],
"cpu_model": "Intel(R) Xeon(R) CPU E5-2650 v4 @ 2.20GHz",
"osmajorrelease": 16,
"pythonexecutable": "/usr/bin/python",
"productname": "OpenStack Nova",
"osarch": "amd64",
"cpuarch": "x86_64",
"lsb_distrib_codename": "xenial",
"osrelease_info": [
16,
4
],
"locale_info": {
"detectedencoding": "UTF-8",
"defaultlanguage": "en_US",
"defaultencoding": "UTF-8"
},
"gpus": [
{
"model": "GD 5446",
"vendor": "unknown"
}
],
"prometheus": {
"server": {
"recording": {},
"target": {
"static": {
"fluentd": {
"relabel_configs": [
{
"regex": "10.11.0.15:24231",
"source_labels": "__address__",
"target_label": "host",
"replacement": "cfg01"
}
],
"honor_labels": true,
"endpoint": [
{
"port": 24231,
"address": "10.11.0.15"
}
]
},
"telegraf": {
"relabel_configs": [
{
"regex": "10.11.0.15:9126",
"source_labels": "__address__",
"target_label": "host",
"replacement": "cfg01"
}
],
"honor_labels": true,
"endpoint": [
{
"port": 9126,
"address": "10.11.0.15"
}
]
},
"jenkins": {
"relabel_configs": [
{
"regex": "10.10.0.15:8081",
"source_labels": "__address__",
"target_label": "host",
"replacement": "cfg01"
}
],
"endpoint": [
{
"port": 8081,
"address": "10.10.0.15"
}
],
"metrics_path": "/prometheus/"
}
}
},
"alert": {
"SystemCpuFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The average CPU usage on the {{ $labels.host }} node is {{ $value }}% for 2 minutes.",
"summary": "90.0% CPU usage"
},
"for": "2m",
"if": "100 - avg_over_time(cpu_usage_idle{cpu=\"cpu-total\"}[5m]) > 90.0"
},
"DockerServiceJenkinsMasterOutage": {
"labels": {
"severity": "critical",
"service": "docker"
},
"annotations": {
"description": "All Docker Swarm 'jenkins_master' replicas are down for 2 minutes.",
"summary": "Docker Swarm 'jenkins_master' service outage"
},
"for": "2m",
"if": "docker_swarm_tasks_running{service_name=\"jenkins_master\"} == 0 or absent(docker_swarm_tasks_running{service_name=\"jenkins_master\"}) == 1"
},
"NetdevBudgetRanOutsWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The rate of net_rx_action loops terminations on the {{ $labels.host }} node is {{ $value }} per second during the last 7 minutes. Modify the net.core.netdev_budget and net.core.netdev_budget_usecs kernel parameters.",
"summary": "CPU terminated 0.1 net_rx_action loops per second"
},
"for": "7m",
"if": "max(rate(nstat_time_squeeze[5m])) without (cpu) > 0.1"
},
"PacketsDroppedByCpuWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 10 minutes.",
"summary": "Increased number of CPU dropped packets"
},
"if": "floor(increase(nstat_packet_drop[10m])) > 0"
},
"SystemRxPacketsDroppedTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} packets received by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute.",
"summary": "60 received packets were dropped"
},
"if": "increase(net_drop_in[1m]) > 60 unless on (host,interface) bond_slave_active == 0"
},
"DockerServiceJenkinsSlave01Outage": {
"labels": {
"severity": "critical",
"service": "docker"
},
"annotations": {
"description": "All Docker Swarm 'jenkins_slave01' replicas are down for 2 minutes.",
"summary": "Docker Swarm 'jenkins_slave01' service outage"
},
"for": "2m",
"if": "docker_swarm_tasks_running{service_name=\"jenkins_slave01\"} == 0 or absent(docker_swarm_tasks_running{service_name=\"jenkins_slave01\"}) == 1"
},
"CertificateExpirationWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.source }} certificate on the {{ $labels.host }} node expires in less than 60 days.",
"summary": "The certificate expires in less than 60 days"
},
"if": "x509_cert_expiry / (24 * 60 * 60) < 60"
},
"DockerServiceOutage": {
"labels": {
"severity": "critical",
"service": "docker"
},
"annotations": {
"description": "All dockerd processes within the {{ $labels.cluster }} cluster are down.",
"summary": "Docker cluster outage"
},
"if": "count(label_replace(procstat_running{process_name=\"dockerd\"}, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster) == count(label_replace(procstat_running{process_name=\"dockerd\"} == 0, \"cluster\", \"$1\", \"host\", \"([^0-9]+).+\")) by (cluster)"
},
"SystemDiskErrorsTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.device }} disk on the {{ $labels.host }} node is reporting errors for 5 minutes.",
"summary": "Disk {{ $labels.device }} is failing"
},
"for": "5m",
"if": "increase(hdd_errors_total[1m]) > 0"
},
"SystemLoadTooHighWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes.",
"summary": "System load is1.0"
},
"for": "5m",
"if": "system_load5 / system_n_cpus > 1.0"
},
"SaltMinionServiceDown": {
"labels": {
"severity": "critical",
"service": "salt"
},
"annotations": {
"description": "The salt-minion service on the {{ $labels.host }} node is down.",
"summary": "Salt-minion service is down"
},
"if": "procstat_running{process_name=\"salt-minion\"} == 0"
},
"SystemLoadTooHighCritical": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The system load per CPU on the {{ $labels.host }} node is {{ $value }} for 5 minutes.",
"summary": "System load is2.0"
},
"for": "5m",
"if": "system_load5 / system_n_cpus > 2.0"
},
"SystemDiskInodesFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of disk inodes in the {{ $labels.path }} volume for 2 minutes.",
"summary": "85.0% of inodes for {{ $labels.path }} are used"
},
"for": "2m",
"if": "100 * disk_inodes_used / disk_inodes_total >= 85.0"
},
"NtpOffsetTooHigh": {
"labels": {
"severity": "warning",
"service": "ntp"
},
"annotations": {
"description": "The NTP offset on the {{ $labels.host }} node is {{ $value }}ms for 2 minutes.",
"summary": "NTP offset reached the limit of 200ms"
},
"for": "2m",
"if": "ntpq_offset >= 200"
},
"PacketsDroppedByCpuMinor": {
"labels": {
"severity": "minor",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.cpu }} CPU on the {{ $labels.host }} node dropped {{ $value }} packets during the last 10 minutes.",
"summary": "CPU dropped more than 100 packets"
},
"if": "floor(increase(nstat_packet_drop[10m])) > 100"
},
"CronProcessDown": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The cron process on the {{ $labels.host }} node is down.",
"summary": "Cron process is down"
},
"if": "procstat_running{process_name=\"cron\"} == 0"
},
"SshdProcessDown": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The SSH process on the {{ $labels.host }} node is down.",
"summary": "SSH process is down"
},
"if": "procstat_running{process_name=\"sshd\"} == 0"
},
"SystemDiskFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The disk partition ({{ $labels.path }}) on the {{ $labels.host }} node is {{ $value }}% full for 2 minutes.",
"summary": "Disk partition {{ $labels.path }} is 85.0% full"
},
"for": "2m",
"if": "disk_used_percent >= 85.0"
},
"SshFailedLoginsTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} failed SSH login attempts on the {{ $labels.host }} node during the last 5 minutes.",
"summary": "5 failed SSH logins"
},
"if": "increase(failed_logins_total[5m]) > 5"
},
"DockerdProcessDown": {
"labels": {
"severity": "minor",
"service": "docker"
},
"annotations": {
"description": "The dockerd process on the {{ $labels.host }} node is down.",
"summary": "Dockerd process is down"
},
"if": "procstat_running{process_name=\"dockerd\"} == 0"
},
"SystemDiskInodesFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of disk inodes in the {{ $labels.path }} volume for 2 minutes.",
"summary": "95.0% of inodes for {{ $labels.path }} are used"
},
"for": "2m",
"if": "100 * disk_inodes_used / disk_inodes_total >= 95.0"
},
"SystemMemoryFullWarning": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of memory for 2 minutes.",
"summary": "90.0% of memory is used"
},
"for": "2m",
"if": "mem_used_percent >= 90.0"
},
"SystemTxPacketsDroppedTooHigh": {
"labels": {
"severity": "warning",
"service": "system"
},
"annotations": {
"description": "{{ $value }} packets transmitted by the {{ $labels.interface }} interface on the {{ $labels.host }} node were dropped during the last minute.",
"summary": "100 transmitted packets were dropped"
},
"if": "increase(net_drop_out[1m]) > 100"
},
"SystemDiskFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The disk partition ({{ $labels.path }}) on the {{ $labels.host }} node is {{ $value }}% full for 2 minutes.",
"summary": "Disk partition {{ $labels.path }} is 95.0% full"
},
"for": "2m",
"if": "disk_used_percent >= 95.0"
},
"DockerServicePostgresqlPostgresqldbOutage": {
"labels": {
"severity": "critical",
"service": "docker"
},
"annotations": {
"description": "All Docker Swarm 'postgresql_postgresql-db' replicas are down for 2 minutes.",
"summary": "Docker Swarm 'postgresql_postgresql-db' service outage"
},
"for": "2m",
"if": "docker_swarm_tasks_running{service_name=\"postgresql_postgresql-db\"} == 0 or absent(docker_swarm_tasks_running{service_name=\"postgresql_postgresql-db\"}) == 1"
},
"SaltMasterServiceDown": {
"labels": {
"severity": "critical",
"service": "salt"
},
"annotations": {
"description": "The salt-master service on the {{ $labels.host }} node is down.",
"summary": "Salt-master service is down"
},
"if": "procstat_running{process_name=\"salt-master\"} == 0"
},
"CertificateExpirationCritical": {
"labels": {
"severity": "critical",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.source }} certificate on the {{ $labels.host }} node expires in less than 30 days.",
"summary": "The certificate expires in less than 30 days"
},
"if": "x509_cert_expiry / (24 * 60 * 60) < 30"
},
"SystemMemoryFullMajor": {
"labels": {
"severity": "major",
"service": "system"
},
"annotations": {
"description": "The {{ $labels.host }} node uses {{ $value }}% of memory for 2 minutes.",
"summary": "95.0% of memory is used"
},
"for": "2m",
"if": "mem_used_percent >= 95.0"
}
}
}
},
"path": "/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin",
"machine_id": "e1d7cc5551ee409b895ce38bb098ed07",
"salt": {
"graph": [
{
"host": "cfg01.ozhurba-os-oc-cicd-sl.local",
"type": "software-system",
"service": "linux.system",
"relations": [
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//saltstack-2017.7//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//td-agent//xenial xenial contrib",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//openstack-queens//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//salt-formulas//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//td-agent//xenial xenial contrib",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//percona//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//saltstack-2017.7//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//maas//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//percona//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//glusterfs-3.8//xenial/ xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//docker//xenial/ xenial stable",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//docker//xenial/ xenial stable",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//ubuntu/ xenial-security main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//extra//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/2019.2.0//extra//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial-updates main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/2019.2.0//ubuntu/ xenial-security main restricted universe",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb [arch=amd64] http://mirror.mirantis.com/update/2019.2.0//salt-formulas//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
},
{
"host_external": "deb http://mirror.mirantis.com/update/2019.2.0//openstack-queens//xenial xenial main",
"direction": "source",
"type": "tcp-http",
"service": "apt.repo"
}
]
},
{
"host": "cfg01.ozhurba-os-oc-cicd-sl.local",
"type": "software-config",
"service": "reclass.storage",
"relations": null
},
{
"host": "cfg01.ozhurba-os-oc-cicd-sl.local",
"type": "software-system",
"service": "ntp.client",
"relations": [
{
"host_external": "udp://ntp.cesnet.cz",
"direction": "source",
"type": "udp",
"service": "other-service"
},
{
"host_external": "udp://pool.ntp.org",
"direction": "source",
"type": "udp",
"service": "other-service"
}
]
},
{
"host": "cfg01.ozhurba-os-oc-cicd-sl.local",
"type": "software-config",
"service": "salt.minion",
"relations": [
{
"direction": "source",
"type": "tcp-0mq",
"service": "salt.master",
"host_from_target": "10.10.0.15"
}
]
},
{
"host": "cfg01.ozhurba-os-oc-cicd-sl.local",
"type": "software-config",
"service": "salt.master",
"relations": null
}
]
},
"backupninja": {
"client": {
"addresses": [
"10.11.0.15"
]
},
"backup": {
"jenkins": {
"fs_includes": [
"/var/lib/jenkins"
],
"fs_excludes": []
},
"salt": {
"fs_includes": [
"/srv/salt/reclass",
"/etc/salt/pki",
"/etc/pki/ca"
],
"fs_excludes": []
},
"maas": {
"fs_includes": [
"/etc/maas",
"/var/lib/maas",
"/var/backups/postgresql"
],
"fs_excludes": [
"/var/lib/maas/boot-resources"
]
}
}
}
}
}
]
}