Merge pull request #23 from tcpcloud/revert-22-stacklight-alarming
Revert "[WIP] Add alarming support"
diff --git a/heka/_service.sls b/heka/_service.sls
index 8c31e7e..9e649b4 100644
--- a/heka/_service.sls
+++ b/heka/_service.sls
@@ -76,42 +76,34 @@
'log_collector': {
'decoder': {},
'input': {},
- 'trigger': {},
- 'alarm': {},
'filter': {},
'splitter': {},
'encoder': {},
- 'output': {},
+ 'output': {}
},
'metric_collector': {
'decoder': {},
'input': {},
- 'trigger': {},
- 'alarm': {},
'filter': {},
'splitter': {},
'encoder': {},
- 'output': {},
+ 'output': {}
},
'remote_collector': {
'decoder': {},
'input': {},
- 'trigger': {},
- 'alarm': {},
'filter': {},
'splitter': {},
'encoder': {},
- 'output': {},
+ 'output': {}
},
'aggregator': {
'decoder': {},
'input': {},
- 'trigger': {},
- 'alarm': {},
'filter': {},
'splitter': {},
'encoder': {},
- 'output': {},
+ 'output': {}
}
} %}
@@ -238,44 +230,6 @@
{%- endfor %}
-{%- for alarm_name, alarm in service_metadata.get('alarm', {}).iteritems() %}
-
-/etc/{{ service_name }}/filter_afd_{{ alarm_name }}.toml:
- file.managed:
- - source: salt://heka/files/toml/filter/afd_alarm.toml
- - template: jinja
- - mode: 640
- - group: heka
- - require:
- - file: heka_{{ service_name }}_conf_dir
- - require_in:
- - file: heka_{{ service_name }}_conf_dir_clean
- - watch_in:
- - service: heka_{{ service_name }}_service
- - defaults:
- alarm_name: {{ alarm_name }}
- alarm: {{ alarm|yaml }}
- trigger: {{ service_metadata.get('trigger', {})|yaml }}
-
-/usr/share/lma_collector/common/lma_{{ alarm_name }}.lua:
- file.managed:
- - source: salt://heka/files/toml/filter/lma_alarm.lua
- - template: jinja
- - mode: 640
- - group: heka
- - require:
- - file: heka_{{ service_name }}_conf_dir
- - require_in:
- - file: heka_{{ service_name }}_conf_dir_clean
- - watch_in:
- - service: heka_{{ service_name }}_service
- - defaults:
- alarm_name: {{ alarm_name }}
- alarm: {{ alarm|yaml }}
- trigger: {{ service_metadata.get('trigger', {})|yaml }}
-
-{%- endfor %}
-
{%- for filter_name, filter in service_metadata.get('filter', {}).iteritems() %}
/etc/{{ service_name }}/filter_{{ filter_name }}.toml:
diff --git a/heka/files/lua/common/afd.lua b/heka/files/lua/common/afd.lua
index 274f6e2..9e864df 100644
--- a/heka/files/lua/common/afd.lua
+++ b/heka/files/lua/common/afd.lua
@@ -129,7 +129,8 @@
end
-- inject an AFD event into the Heka pipeline
-function inject_afd_metric(value, hostname, afd_name, to_alerting)
+function inject_afd_metric(msg_type, msg_tag_name, msg_tag_value, metric_name,
+ value, hostname, interval, source, to_alerting)
local payload
if #alarms > 0 then
@@ -149,22 +150,32 @@
end
local msg = {
- Type = 'afd_metric',
+ Type = msg_type,
Payload = payload,
Fields = {
- name='status',
+ name=metric_name,
value=value,
hostname=hostname,
- member=afd_name,
- tag_fields={'hostname', 'member'},
- no_alerting=no_alerting,
+ interval=interval,
+ source=source,
+ tag_fields={msg_tag_name, 'source', 'hostname'},
+ no_alerting = no_alerting,
}
}
-
+ msg.Fields[msg_tag_name] = msg_tag_value,
lma.inject_tags(msg)
lma.safe_inject_message(msg)
end
+-- inject an AFD service event into the Heka pipeline
+function inject_afd_service_metric(service, value, hostname, interval, source)
+ inject_afd_metric('afd_service_metric',
+ 'service',
+ service,
+ 'service_status',
+ value, hostname, interval, source)
+end
+
MATCH = 1
NO_MATCH = 2
NO_DATA = 3
diff --git a/heka/files/lua/filters/afd.lua b/heka/files/lua/filters/afd.lua
index d5d73cb..bf10322 100644
--- a/heka/files/lua/filters/afd.lua
+++ b/heka/files/lua/filters/afd.lua
@@ -17,11 +17,34 @@
local utils = require 'lma_utils'
local afd = require 'afd'
-local afd_file = read_config('afd_file') or error('afd_file must be specified')
-local afd_name = read_config('afd_name') or error('afd_name must be specified')
-local hostname = read_config('hostname') or error('hostname must be specified')
+-- node or service
+local afd_type = read_config('afd_type') or error('afd_type must be specified!')
local activate_alerting = read_config('activate_alerting') or true
+local msg_type
+local msg_field_name
+local afd_entity
+if afd_type == 'node' then
+ msg_type = 'afd_node_metric'
+ msg_field_name = 'node_status'
+ afd_entity = 'node_role'
+elseif afd_type == 'service' then
+ msg_type = 'afd_service_metric'
+ msg_field_name = 'service_status'
+ afd_entity = 'service'
+else
+ error('invalid afd_type value')
+end
+
+-- ie: controller for node AFD / rabbitmq for service AFD
+local afd_entity_value = read_config('afd_cluster_name') or error('afd_cluster_name must be specified!')
+
+-- ie: cpu for node AFD / queue for service AFD
+local msg_field_source = read_config('afd_logical_name') or error('afd_logical_name must be specified!')
+
+local hostname = read_config('hostname') or error('hostname must be specified')
+
+local afd_file = read_config('afd_file') or error('afd_file must be specified')
local all_alarms = require(afd_file)
local A = require 'afd_alarms'
A.load_alarms(all_alarms)
@@ -67,7 +90,8 @@
alarm.alert.message)
end
- afd.inject_afd_metric(state, hostname, afd_name, activate_alerting)
+ afd.inject_afd_metric(msg_type, afd_entity, afd_entity_value, msg_field_name,
+ state, hostname, interval, msg_field_source, activate_alerting)
end
else
A.set_start_time(ns)
diff --git a/heka/files/toml/filter/afd_alarm.toml b/heka/files/toml/filter/afd_alarm.toml
deleted file mode 100644
index a17afe2..0000000
--- a/heka/files/toml/filter/afd_alarm.toml
+++ /dev/null
@@ -1,30 +0,0 @@
-{# Find the metrics involved in this alarm and build the list of message matchers #}
-{%- set _matchers = [] %}
-{%- for _trigger_name in alarm.triggers %}
- {%- set _trigger = trigger.get(_trigger_name, {}) %}
- {%- if _trigger.get('enabled', True) %}
- {%- for _rule in _trigger.get('rules', []) %}
- {%- set _matcher = "Fields[name] == '%s'" % _rule.metric %}
- {%- if not _matcher in _matchers %}
- {%- do _matchers.append("Fields[name] == '%s'" % _rule.metric) %}
- {%- endif %}
- {%- endfor %}
- {%- endif %}
-{%- endfor -%}
-
-[afd_{{ alarm_name }}_filter]
-type = "SandboxFilter"
-filename = "/usr/share/lma_collector/filters/afd.lua"
-{%- if alarm.preserve_data is defined %}
-preserve_data = {{ alarm.preserve_data|lower }}
-{%- else %}
-preserve_data = false
-{%- endif %}
-message_matcher = "(Type == 'metric' || Type == 'heka.sandbox.metric') && ({{ _matchers|join(' || ') }})"
-module_directory = "/usr/share/lma_collector/common;/usr/share/heka/lua_modules"
-ticker_interval = 10
-
-[afd_{{ alarm_name }}_filter.config]
-afd_file = 'lma_{{ alarm_name }}'
-afd_name = '{{ alarm_name }}'
-hostname = '{{ grains.host }}'
diff --git a/heka/files/toml/filter/lma_alarm.lua b/heka/files/toml/filter/lma_alarm.lua
deleted file mode 100644
index 677c606..0000000
--- a/heka/files/toml/filter/lma_alarm.lua
+++ /dev/null
@@ -1,47 +0,0 @@
-local M = {}
-setfenv(1, M) -- Remove external access to contain everything in the module
-
-local alarms = {
-{%- for _trigger_name in alarm.triggers %}
-{%- set _trigger = trigger.get(_trigger_name, {}) %}
-{%- if _trigger.get('enabled', True) %}
- {
- ['name'] = '{{ _trigger_name}}',
- ['description'] = '{{ _trigger.get("description", "").replace("'", "\\'") }}',
- ['severity'] = '{{ _trigger.severity }}',
- {%- if _trigger.no_data_policy is defined %}
- ['no_data_policy'] = '{{ _trigger.no_data_policy }}',
- {%- endif %}
- ['trigger'] = {
- ['logical_operator'] = '{{ _trigger.get("logical_operator", "or") }}',
- ['rules'] = {
- {%- for _rule in _trigger.get('rules', []) %}
- {
- ['metric'] = '{{ _rule.metric }}',
- ['fields'] = {
- {%- for _field_name, _field_value in _rule.get('field', {}).iteritems() %}
- ['{{ _field_name }}'] = '{{ _field_value }}',
- {%- endfor %}
- },
- ['relational_operator'] = '{{ _rule.relational_operator }}',
- ['threshold'] = '{{ _rule.threshold }}',
- ['window'] = '{{ _rule.window }}',
- ['periods'] = '{{ _rule.get('periods', 0) }}',
- ['function'] = '{{ _rule.function }}',
- {%- if _rule.group_by is defined %}
- ['group_by'] = {
- {%- for _group_by in rule.group_by %}
- {{ _group_by }},
- {%- endfor %}
- },
- {%- endif %}
- },
- {%- endfor %}
- },
- },
- },
-{%- endif %}
-{%- endfor %}
-}
-
-return alarms