| {% from "nova/map.jinja" import controller with context %} |
| {%- set apache_wsgi = controller.get('enabled') and controller.version not in ('juno', 'kilo', 'liberty', 'mitaka', 'newton') %} |
| log_collector: |
| decoder: |
| nova: |
| engine: sandbox |
| module_file: /usr/share/lma_collector/decoders/openstack_log.lua |
| module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules |
| adjust_timezone: true |
| {%- if pillar.nova.compute is defined %} |
| libvirt: |
| engine: sandbox |
| module_file: /usr/share/lma_collector/decoders/libvirt_log.lua |
| module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules |
| {%- endif %} |
| {%- if apache_wsgi %} |
| nova_placement_wsgi: |
| engine: sandbox |
| module_file: /usr/share/lma_collector/decoders/apache_wsgi_log.lua |
| module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules |
| config: |
| logger: openstack.nova |
| apache_log_pattern: >- |
| %v:%p %h %l %u %t \"%r\" %>s %D %O \"%{Referer}i\" \"%{User-Agent}i\" |
| {%- endif %} |
| splitter: |
| nova: |
| engine: token |
| delimiter: '\n' |
| input: |
| nova_log: |
| engine: logstreamer |
| log_directory: "/var/log" |
| file_match: 'nova/(?P<Service>.+)\.log\.?(?P<Seq>\d*)$' |
| differentiator: ['nova', '_', 'Service'] |
| priority: ["^Seq"] |
| decoder: "nova_decoder" |
| splitter: "nova_splitter" |
| {%- if pillar.nova.compute is defined %} |
| libvirt_log: |
| engine: logstreamer |
| log_directory: "/var/log" |
| file_match: 'libvirt/libvirtd.log' |
| differentiator: ['libvirt'] |
| decoder: "libvirt_decoder" |
| splitter: "TokenSplitter" |
| {%- endif %} |
| {%- if apache_wsgi %} |
| nova_placement_wsgi_log: |
| engine: logstreamer |
| log_directory: "/var/log/apache2" |
| file_match: 'nova_placement_access\.log' |
| differentiator: ['nova-placement-wsgi'] |
| priority: ["^Seq"] |
| decoder: "nova_placement_wsgi_decoder" |
| splitter: "TokenSplitter" |
| {%- endif %} |
| metric_collector: |
| trigger: |
| nova_logs_error: |
| description: 'Too many errors have been detected in Nova logs' |
| severity: warning |
| no_data_policy: okay |
| rules: |
| - metric: log_messages |
| field: |
| service: nova |
| level: error |
| relational_operator: '>' |
| threshold: 0.1 |
| window: 70 |
| periods: 0 |
| function: max |
| {%- if pillar.nova.controller is defined %} |
| nova_api_local_endpoint: |
| description: 'Nova API is locally down' |
| severity: down |
| rules: |
| - metric: openstack_check_local_api |
| field: |
| service: nova-api |
| relational_operator: '==' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: last |
| {%- endif %} |
| {%- if pillar.nova.compute is defined %} |
| libvirt_check: |
| description: 'Libvirt cannot be checked' |
| severity: down |
| rules: |
| - metric: libvirt_check |
| relational_operator: '==' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: last |
| {%- endif %} |
| alarm: |
| {%- if pillar.nova.compute is defined %} |
| nova_logs_compute: |
| alerting: enabled |
| triggers: |
| - nova_logs_error |
| dimension: |
| service: nova-logs-compute |
| libvirt_check: |
| alerting: enabled |
| triggers: |
| - libvirt_check |
| dimension: |
| service: libvirt-check |
| {%- endif %} |
| {%- if pillar.nova.controller is defined %} |
| nova_logs: |
| alerting: enabled |
| triggers: |
| - nova_logs_error |
| dimension: |
| service: nova-logs |
| nova_api_endpoint: |
| alerting: enabled |
| triggers: |
| - nova_api_local_endpoint |
| dimension: |
| service: nova-api-endpoint |
| {%- endif %} |
| remote_collector: |
| trigger: |
| {%- if pillar.nova.controller is defined %} |
| nova_api_check_failed: |
| description: 'Endpoint check for nova-api is failed' |
| severity: down |
| rules: |
| - metric: openstack_check_api |
| field: |
| service: nova-api |
| relational_operator: '==' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: last |
| {%- for nova_service in ('cert', 'consoleauth', 'conductor', 'scheduler') %} |
| nova_{{ nova_service }}_two_up: |
| description: 'Some Nova {{ nova_service }}s are down' |
| severity: warning |
| logical_operator: and |
| rules: |
| - metric: openstack_nova_services |
| field: |
| service: {{ nova_service }} |
| state: up |
| relational_operator: '>=' |
| threshold: 2 |
| window: 60 |
| periods: 0 |
| function: last |
| - metric: openstack_nova_services |
| field: |
| service: {{ nova_service }} |
| state: down |
| relational_operator: '>' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: last |
| nova_{{ nova_service }}_one_up: |
| description: 'Only one Nova {{ nova_service }} is up' |
| severity: critical |
| logical_operator: and |
| rules: |
| - metric: openstack_nova_services |
| field: |
| service: {{ nova_service }} |
| state: up |
| relational_operator: '==' |
| threshold: 1 |
| window: 60 |
| periods: 0 |
| function: last |
| - metric: openstack_nova_services_percent |
| field: |
| service: {{ nova_service }} |
| state: up |
| relational_operator: '<' |
| threshold: 100 |
| window: 60 |
| periods: 0 |
| function: last |
| nova_{{ nova_service }}_zero_up: |
| description: 'All Nova {{ nova_service }}s are down or disabled' |
| severity: down |
| rules: |
| - metric: openstack_nova_services |
| field: |
| service: {{ nova_service }} |
| state: up |
| relational_operator: '==' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: last |
| {%- endfor %} |
| # we treat "up" and "disabled" states in the same way, considering |
| # that "disabled" should not be treated as an error |
| nova_compute_some_down: |
| description: 'Some Nova computes are down' |
| severity: warning |
| logical_operator: and |
| rules: |
| - metric: openstack_nova_services_percent |
| field: |
| service: compute |
| state: down |
| relational_operator: '>' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: last |
| nova_compute_majority_down: |
| description: 'Majority of Nova computes are down' |
| severity: critical |
| rules: |
| - metric: openstack_nova_services_percent |
| field: |
| service: compute |
| state: down |
| relational_operator: '>' |
| threshold: 50 |
| window: 60 |
| periods: 0 |
| function: last |
| nova_compute_all_down: |
| description: 'All Nova computes are down' |
| severity: down |
| rules: |
| - metric: openstack_nova_services_percent |
| field: |
| service: compute |
| state: down |
| relational_operator: '==' |
| threshold: 100 |
| window: 60 |
| periods: 0 |
| function: last |
| nova_total_free_vcpu_warning: |
| description: 'There is no VCPU available for new instances' |
| severity: warning |
| rules: |
| - metric: openstack_nova_total_free_vcpus |
| relational_operator: '==' |
| threshold: 10 |
| window: 60 |
| periods: 0 |
| function: max |
| nova_total_free_memory_warning: |
| description: 'There is no memory available for new instances' |
| severity: warning |
| rules: |
| - metric: openstack_nova_total_free_ram |
| relational_operator: '==' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: max |
| nova_aggregates_free_memory_warning: |
| description: 'The nova aggregates free memory percent is low' |
| severity: warning |
| no_data_policy: okay |
| rules: |
| - metric: openstack_nova_aggregate_free_ram_percent |
| group_by: [aggregate] |
| relational_operator: '<' |
| threshold: 10.0 |
| window: 70 |
| periods: 0 |
| function: min |
| nova_aggregates_free_memory_critical: |
| description: 'The nova aggregates free memory percent is too low' |
| severity: critical |
| no_data_policy: okay |
| rules: |
| - metric: openstack_nova_aggregate_free_ram_percent |
| group_by: [aggregate] |
| relational_operator: '<' |
| threshold: 1.0 |
| window: 70 |
| periods: 0 |
| function: min |
| {%- endif %} |
| alarm: |
| {%- if pillar.nova.controller is defined %} |
| nova_api_check: |
| alerting: enabled |
| triggers: |
| - nova_api_check_failed |
| dimension: |
| service: nova-api-check |
| {%- for nova_service in ('cert', 'consoleauth', 'conductor', 'scheduler') %} |
| nova_{{ nova_service }}: |
| alerting: enabled |
| triggers: |
| - nova_{{ nova_service }}_zero_up |
| - nova_{{ nova_service }}_one_up |
| - nova_{{ nova_service }}_two_up |
| dimension: |
| service: nova-{{ nova_service }} |
| {%- endfor %} |
| nova_total_free_vcpu: |
| alerting: enabled |
| triggers: |
| - nova_total_free_vcpu_warning |
| dimension: |
| service: nova-total-free-vcpu |
| nova_total_free_memory: |
| alerting: enabled |
| triggers: |
| - nova_total_free_memory_warning |
| dimension: |
| service: nova-total-free-memory |
| nova_aggregates_free_memory: |
| alerting: enabled |
| triggers: |
| - nova_aggregates_free_memory_critical |
| - nova_aggregates_free_memory_warning |
| dimension: |
| service: nova-aggregates-free-memory |
| nova_compute: |
| alerting: enabled |
| triggers: |
| - nova_compute_all_down |
| - nova_compute_majority_down |
| - nova_compute_some_down |
| dimension: |
| service: nova-compute |
| {%- endif %} |
| aggregator: |
| alarm_cluster: |
| nova_logs_compute: |
| policy: majority_of_node_members |
| group_by: hostname |
| alerting: enabled |
| match: |
| service: nova-logs-compute |
| members: |
| - nova_logs_compute |
| dimension: |
| service: nova-data |
| nagios_host: 01-service-clusters |
| nova_libvirt: |
| policy: majority_of_node_members |
| group_by: hostname |
| alerting: enabled |
| match: |
| service: libvirt-check |
| members: |
| - libvirt_check |
| dimension: |
| service: nova-data |
| nagios_host: 01-service-clusters |
| nova_logs: |
| policy: status_of_members |
| group_by: hostname |
| alerting: enabled |
| match: |
| service: nova-logs |
| members: |
| - nova_logs |
| dimension: |
| service: nova-control |
| nagios_host: 01-service-clusters |
| nova_api_endpoint: |
| policy: availability_of_members |
| group_by: hostname |
| alerting: enabled |
| match: |
| service: nova-api-endpoint |
| members: |
| - nova_api_endpoint |
| dimension: |
| service: nova-control |
| nagios_host: 01-service-clusters |
| nova_api_check: |
| policy: highest_severity |
| alerting: enabled |
| match: |
| service: nova-api-check |
| members: |
| - nova_api_check |
| dimension: |
| service: nova-control |
| nagios_host: 01-service-clusters |
| {%- for nova_service in ('cert', 'consoleauth', 'conductor', 'scheduler') %} |
| nova_{{ nova_service }}: |
| policy: highest_severity |
| alerting: enabled |
| match: |
| service: nova-{{ nova_service }} |
| members: |
| - nova_{{ nova_service }} |
| dimension: |
| service: nova-control |
| nagios_host: 01-service-clusters |
| {%- endfor %} |
| nova_total_free_vcpu: |
| policy: highest_severity |
| alerting: enabled |
| match: |
| service: nova-total-free-vcpu |
| members: |
| - nova_total_free_vcpu |
| dimension: |
| service: nova-data |
| nagios_host: 01-service-clusters |
| nova_total_free_memory: |
| policy: highest_severity |
| alerting: enabled |
| match: |
| service: nova-total-free-memory |
| members: |
| - nova_total_free_memory |
| dimension: |
| service: nova-data |
| nagios_host: 01-service-clusters |
| nova_aggregates_free_memory: |
| policy: highest_severity |
| alerting: enabled |
| match: |
| service: nova-aggregates-free-memory |
| members: |
| - nova_aggregates_free_memory |
| dimension: |
| service: nova-data |
| nagios_host: 01-service-clusters |
| nova_compute: |
| policy: highest_severity |
| alerting: enabled |
| match: |
| service: nova-compute |
| members: |
| - nova_compute |
| dimension: |
| service: nova-data |
| nagios_host: 01-service-clusters |
| nova_control: |
| policy: highest_severity |
| alerting: enabled_with_notification |
| match: |
| service: nova-control |
| members: |
| - nova_logs |
| - nova_api_endpoint |
| - nova_api_check |
| {%- for nova_service in ('cert', 'consoleauth', 'conductor', 'scheduler') %} |
| - nova_{{ nova_service }} |
| {%- endfor %} |
| - haproxy_nova-control |
| dimension: |
| cluster_name: nova-control |
| nagios_host: 00-top-clusters |
| nova_data: |
| policy: highest_severity |
| alerting: enabled_with_notification |
| match: |
| service: nova-data |
| members: |
| - nova_logs_compute |
| - nova_libvirt |
| - nova_total_free_vcpu |
| - nova_total_free_memory |
| - nova_aggregates_free_memory |
| - nova_compute |
| dimension: |
| cluster_name: nova-data |
| nagios_host: 00-top-clusters |