| log_collector: |
| decoder: |
| rabbitmq_log: |
| engine: sandbox |
| module_file: /usr/share/lma_collector/decoders/rabbitmq.lua |
| module_dir: /usr/share/lma_collector/common;/usr/share/heka/lua_modules |
| adjust_timezone: true |
| input: |
| rabbitmq_log: |
| engine: logstreamer |
| log_directory: "/var/log/rabbitmq" |
| file_match: 'rabbit@(?P<Node>.+)\.log$' |
| differentiator: ["rabbitmq.", "Node"] |
| decoder: "rabbitmq_log_decoder" |
| splitter: "rabbitmq_log_splitter" |
| splitter: |
| rabbitmq_log: |
| engine: regex |
| delimiter: '\n\n(=[^=]+====)' |
| delimiter_eol: false |
| metric_collector: |
| trigger: |
| rabbitmq_disk_limit_critical: |
| description: 'RabbitMQ has reached the free disk threshold. All producers are blocked.' |
| severity: 'critical' |
| no_data_policy: 'okay' |
| rules: |
| - metric: rabbitmq_remaining_disk |
| relational_operator: '<=' |
| threshold: 0 |
| window: 20 |
| periods: 0 |
| function: min |
| rabbitmq_disk_limit_warning: |
| description: 'RabbitMQ is getting close to the free disk threshold.' |
| severity: 'warning' |
| no_data_policy: 'okay' |
| rules: |
| - metric: rabbitmq_remaining_disk |
| relational_operator: '<=' |
| threshold: 104857600 # 100MB |
| window: 20 |
| periods: 0 |
| function: min |
| rabbitmq_memory_limit_critical: |
| description: 'RabbitMQ has reached the memory threshold. All producers are blocked.' |
| severity: 'critical' |
| no_data_policy: 'okay' |
| rules: |
| - metric: rabbitmq_remaining_memory |
| relational_operator: '<=' |
| threshold: 0 |
| window: 20 |
| periods: 0 |
| function: min |
| rabbitmq_memory_limit_warning: |
| description: 'RabbitMQ is getting close to the memory threshold.' |
| severity: warning |
| no_data_policy: 'okay' |
| rules: |
| - metric: rabbitmq_remaining_memory |
| relational_operator: '<=' |
| threshold: 104857600 # 100MB |
| window: 20 |
| periods: 0 |
| function: min |
| rabbitmq_queue_warning: |
| description: 'The number of outstanding messages is too high.' |
| severity: warning |
| no_data_policy: 'okay' |
| rules: |
| - metric: rabbitmq_messages |
| relational_operator: '>=' |
| threshold: 200 |
| window: 120 |
| periods: 0 |
| function: avg |
| rabbitmq_check: |
| description: 'RabbitMQ cannot be checked' |
| severity: down |
| rules: |
| - metric: rabbitmq_check |
| relational_operator: '==' |
| threshold: 0 |
| window: 60 |
| periods: 0 |
| function: last |
| alarm: |
| rabbitmq_server_disk: |
| alerting: enabled |
| triggers: |
| - rabbitmq_disk_limit_critical |
| - rabbitmq_disk_limit_warning |
| dimension: |
| service: rabbitmq-cluster |
| rabbitmq_server_memory: |
| alerting: enabled |
| triggers: |
| - rabbitmq_memory_limit_critical |
| - rabbitmq_memory_limit_warning |
| dimension: |
| service: rabbitmq-cluster |
| rabbitmq_server_queue: |
| alerting: enabled |
| triggers: |
| - rabbitmq_queue_warning |
| dimension: |
| service: rabbitmq-cluster |
| rabbitmq_check: |
| alerting: enabled |
| triggers: |
| - rabbitmq_check |
| aggregator: |
| alarm_cluster: |
| rabbitmq_cluster: |
| alerting: enabled |
| policy: highest_severity |
| # A 'hostname' group_by is required because an alarm on a single node has |
| # an impact on the whole cluster. |
| group_by: hostname |
| match: |
| service: rabbitmq-cluster |
| members: |
| - rabbitmq_server_disk |
| - rabbitmq_server_memory |
| - rabbitmq_server_queue |
| dimension: |
| service: rabbitmq |
| nagios_host: 01-service-clusters |
| rabbitmq_service: |
| # A check failure on a single node doesn't mean that the whole cluster |
| # is down, this is why a 'hostname' group_by and 'availability_of_members' |
| # policy are used here |
| policy: availability_of_members |
| alerting: enabled |
| group_by: hostname |
| match: |
| member: rabbitmq_check |
| members: |
| - rabbitmq_check |
| dimension: |
| service: rabbitmq |
| nagios_host: 01-service-clusters |
| rabbitmq: |
| policy: highest_severity |
| alerting: enabled_with_notification |
| match: |
| service: rabbitmq |
| members: |
| - rabbitmq_cluster |
| - rabbitmq_service |
| dimension: |
| cluster_name: rabbitmq |
| nagios_host: 00-top-clusters |