Initial commit
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
new file mode 100644
index 0000000..9ea5ae2
--- /dev/null
+++ b/CHANGELOG.rst
@@ -0,0 +1,6 @@
+sensu-formula
+=============
+
+0.0.1 (2015-08-03)
+
+- Initial formula setup
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..6f2b42f
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,13 @@
+Copyright (c) 2014-2015 tcp cloud a.s.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
\ No newline at end of file
diff --git a/README.rst b/README.rst
new file mode 100644
index 0000000..1fd88db
--- /dev/null
+++ b/README.rst
@@ -0,0 +1,114 @@
+
+=====
+Sensu
+=====
+
+Sample pillars
+==============
+
+Sensu Server with API
+
+.. code-block:: yaml
+
+ sensu:
+ server:
+ enabled: true
+ keepalive_warning: 20
+ keepalive_critical: 60
+ mine_checks: true
+ database:
+ engine: redis
+ host: localhost
+ port: 6379
+ message_queue:
+ engine: rabbitmq
+ host: rabbitmq
+ port: 5672
+ user: monitor
+ password: pwd
+ virtual_host: '/monitor'
+ bind:
+ address: 0.0.0.0
+ port: 4567
+ handler:
+ default:
+ enabled: true
+ set:
+ - mail
+ stdout:
+ enabled: true
+ mail:
+ mail_to: 'mail@domain.cz'
+ host: smtp1.domain.cz
+ port: 465
+ user: 'mail@domain.cz'
+ password: 'pwd'
+ authentication: cram_md5
+ encryption: ssl
+ domain: 'domain.cz'
+
+Sensu Dashboard (now uchiwa)
+
+.. code-block:: yaml
+
+ sensu:
+ dashboard:
+ enabled: true
+ bind:
+ address: 0.0.0.0
+ port: 8080
+ admin:
+ username: admin
+ password: pass
+
+Sensu Client
+
+.. code-block:: yaml
+
+ sensu:
+ client:
+ enabled: true
+ message_queue:
+ engine: rabbitmq
+ host: rabbitmq
+ port: 5672
+ user: monitor
+ password: pwd
+ virtual_host: '/monitor'
+
+Sensu Client with community plugins
+
+.. code-block:: yaml
+
+ sensu:
+ client:
+ enabled: true
+ plugin:
+ sensu_community_plugins:
+ enabled: true
+ monitoring_for_openstack:
+ enabled: true
+ message_queue:
+ engine: rabbitmq
+ host: rabbitmq
+ port: 5672
+ user: monitor
+ password: pwd
+ virtual_host: '/monitor'
+
+Read more
+=========
+
+* http://docs.sensuapp.org/0.9/installing_sensu.html
+* https://speakerdeck.com/joemiller/practical-examples-with-sensu-monitoring-framework
+* https://github.com/fridim/nagios-plugin-check_galera_cluster
+* http://www.reimann.sh/2011/06/30/nagios-check-pacemaker-failed-actions/
+* http://sys4.de/en/blog/2014/01/23/montoring-pacemaker-nagios/
+* https://raw.githubusercontent.com/sensu/sensu-community-plugins/master/plugins/openstack/neutron/neutron-agent-status.py
+* https://github.com/sensu/sensu-community-plugins/blob/master/plugins/openstack/keystone/check_keystone-api.sh
+* http://openstack.prov12n.com/monitoring-openstack-nagios-3/
+* https://raw.githubusercontent.com/drewkerrigan/nagios-http-json/master/check_http_json.py
+* https://github.com/opinkerfi/nagios-plugins/tree/master/check_ibm_bladecenter
+* https://github.com/opinkerfi/nagios-plugins/tree/master/check_storwize
+* https://github.com/ehazlett/sensu-py/
+* https://github.com/Level-Up/Supervisord-Nagios-Plugin/blob/master/check_supv.py
diff --git a/VERSION b/VERSION
new file mode 100644
index 0000000..3b04cfb
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.2
diff --git a/debian/changelog b/debian/changelog
new file mode 100644
index 0000000..23ed40f
--- /dev/null
+++ b/debian/changelog
@@ -0,0 +1,11 @@
+salt-formula-sensu (0.2) trusty; urgency=medium
+
+ * First public release
+
+ -- Filip Pytloun <filip.pytloun@tcpcloud.eu> Tue, 06 Oct 2015 16:38:53 +0200
+
+salt-formula-sensu (0.1) trusty; urgency=medium
+
+ * Initial release
+
+ -- Ales Komarek <ales.komarek@tcpcloud.eu> Thu, 13 Aug 2015 23:23:41 +0200
diff --git a/debian/compat b/debian/compat
new file mode 100644
index 0000000..ec63514
--- /dev/null
+++ b/debian/compat
@@ -0,0 +1 @@
+9
diff --git a/debian/control b/debian/control
new file mode 100644
index 0000000..f38b13e
--- /dev/null
+++ b/debian/control
@@ -0,0 +1,15 @@
+Source: salt-formula-sensu
+Maintainer: Ales Komarek <ales.komarek@tcpcloud.eu>
+Section: admin
+Priority: optional
+Build-Depends: debhelper (>= 9)
+Standards-Version: 3.9.6
+Homepage: http://www.tcpcloud.eu
+Vcs-Browser: https://github.com/tcpcloud/salt-formula-sensu
+Vcs-Git: https://github.com/tcpcloud/salt-formula-sensu.git
+
+Package: salt-formula-sensu
+Architecture: all
+Depends: ${misc:Depends}, salt-master, reclass
+Description: Sensu salt formula
+ Configure Sensu operating system.
diff --git a/debian/copyright b/debian/copyright
new file mode 100644
index 0000000..04a5420
--- /dev/null
+++ b/debian/copyright
@@ -0,0 +1,15 @@
+Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
+Upstream-Name: salt-formula-sensu
+Upstream-Contact: Ales Komarek <ales.komarek@tcpcloud.eu>
+Source: https://github.com/tcpcloud/salt-formula-sensu
+
+Files: *
+Copyright: 2014-2015 tcp cloud a.s.
+License: Apache-2.0
+ Copyright (C) 2014-2015 tcp cloud a.s.
+ .
+ Licensed under the Apache License, Version 2.0 (the "License");
+ you may not use this file except in compliance with the License.
+ .
+ On a Debian system you can find a copy of this license in
+ /usr/share/common-licenses/Apache-2.0.
diff --git a/debian/docs b/debian/docs
new file mode 100644
index 0000000..d585829
--- /dev/null
+++ b/debian/docs
@@ -0,0 +1,3 @@
+README.rst
+CHANGELOG.rst
+VERSION
diff --git a/debian/install b/debian/install
new file mode 100644
index 0000000..8317097
--- /dev/null
+++ b/debian/install
@@ -0,0 +1,2 @@
+sensu/* /usr/share/salt-formulas/env/sensu/
+metadata/service/* /usr/share/salt-formulas/reclass/service/sensu/
diff --git a/debian/rules b/debian/rules
new file mode 100755
index 0000000..abde6ef
--- /dev/null
+++ b/debian/rules
@@ -0,0 +1,5 @@
+#!/usr/bin/make -f
+
+%:
+ dh $@
+
diff --git a/debian/source/format b/debian/source/format
new file mode 100644
index 0000000..89ae9db
--- /dev/null
+++ b/debian/source/format
@@ -0,0 +1 @@
+3.0 (native)
diff --git a/metadata/service/client/init.yml b/metadata/service/client/init.yml
new file mode 100644
index 0000000..4a7aa4e
--- /dev/null
+++ b/metadata/service/client/init.yml
@@ -0,0 +1,17 @@
+applications:
+- sensu
+parameters:
+ sensu:
+ client:
+ enabled: true
+ message_queue:
+ engine: rabbitmq
+ host: ${_param:sensu_message_queue_host}
+ port: 5672
+ user: monitor
+ password: ${_param:rabbitmq_monitor_password}
+ virtual_host: '/monitor'
+ metric_handlers:
+ - graphite
+ check_handlers:
+ - default
diff --git a/metadata/service/server/handler_statsd.yml b/metadata/service/server/handler_statsd.yml
new file mode 100644
index 0000000..dafc495
--- /dev/null
+++ b/metadata/service/server/handler_statsd.yml
@@ -0,0 +1,11 @@
+parameters:
+ sensu:
+ server:
+ handler:
+ default:
+ set:
+ - statsd
+ statsd:
+ enabled: true
+ host: 127.0.0.1
+ port: 8125
diff --git a/metadata/service/server/handler_stdout.yml b/metadata/service/server/handler_stdout.yml
new file mode 100644
index 0000000..f0f77b2
--- /dev/null
+++ b/metadata/service/server/handler_stdout.yml
@@ -0,0 +1,9 @@
+parameters:
+ sensu:
+ server:
+ handler:
+ default:
+ set:
+ - stdout
+ stdout:
+ enabled: true
diff --git a/metadata/service/server/single.yml b/metadata/service/server/single.yml
new file mode 100644
index 0000000..dcc3134
--- /dev/null
+++ b/metadata/service/server/single.yml
@@ -0,0 +1,27 @@
+applications:
+- sensu
+parameters:
+ sensu:
+ server:
+ enabled: true
+ database:
+ engine: redis
+ host: 127.0.0.1
+ port: 6379
+ message_queue:
+ engine: rabbitmq
+ host: ${_param:sensu_message_queue_host}
+ port: 5672
+ user: monitor
+ password: ${_param:rabbitmq_monitor_password}
+ virtual_host: '/monitor'
+ bind:
+ address: 0.0.0.0
+ port: 4567
+ handler:
+ default:
+ enabled: true
+ metric_handlers:
+ - graphite
+ check_handlers:
+ - default
\ No newline at end of file
diff --git a/sensu/_common.sls b/sensu/_common.sls
new file mode 100644
index 0000000..3376acc
--- /dev/null
+++ b/sensu/_common.sls
@@ -0,0 +1,38 @@
+
+{#
+{%- if grains.os_family == 'Debian' %}
+
+sensu_repo:
+ pkgrepo.managed:
+ - human_name: Sensu
+ - name: deb http://repos.sensuapp.org/apt sensu main
+ - file: /etc/apt/sources.list.d/sensu.list
+ - key_url: salt://sensu/conf/sensu-apt.gpg
+
+{%- elif grains.os_family == 'RedHat' %}
+
+sensu_repo:
+ pkgrepo.managed:
+ - name: sensu
+ - humanname: sensu-main
+ - baseurl: http://repos.sensuapp.org/yum/el/$releasever/$basearch/
+ - gpgcheck: 0
+
+{%- endif %}
+#}
+
+/etc/sensu:
+ file.directory:
+ - user: sensu
+ - group: sensu
+ - mode: 755
+ - makedirs: true
+
+/srv/sensu:
+ file.directory:
+ - user: root
+ - group: root
+ - mode: 755
+ - makedirs: true
+ - require:
+ - file: /etc/sensu
diff --git a/sensu/client.sls b/sensu/client.sls
new file mode 100644
index 0000000..8c48480
--- /dev/null
+++ b/sensu/client.sls
@@ -0,0 +1,107 @@
+{%- from "sensu/map.jinja" import client with context %}
+{%- if client.enabled %}
+
+include:
+- sensu._common
+
+sensu_client_packages:
+ pkg.installed:
+ - names: {{ client.pkgs }}
+ - require_in:
+ - file: /etc/sensu
+
+/etc/sensu/plugins:
+ file.recurse:
+ - clean: true
+ - source: salt://sensu/files/checks
+ - user: sensu
+ - group: sensu
+ - file_mode: 755
+ - dir_mode: 755
+ - makedirs: true
+ - require:
+ - file: /srv/sensu
+
+{%- for plugin_name, plugin in client.plugin.iteritems() %}
+{%- if plugin.enabled %}
+
+{%- if plugin_name == 'sensu_community_plugins' %}
+
+sensu_client_community_plugins
+ gem.installed:
+ - names:
+ - sensu-plugin
+
+{%- endif %}
+
+{%- if plugin_name == 'monitoring_for_openstack' %}
+
+sensu_monitor_openstack_six:
+ pip.installed:
+ - name: six>=1.9.0
+
+sensu_monitor_openstack_source:
+ git.latest:
+ - name: https://github.com/stackforge/monitoring-for-openstack.git
+ - target: /root/monitoring-for-openstack
+ - rev: master
+ - require:
+ - pip: sensu_monitor_openstack_six
+
+sensu_monitor_openstack_install:
+ cmd.run:
+ - name: python setup.py install
+ - cwd: /root/monitoring-for-openstack
+ - unless: pip freeze | grep monitoring-for-openstack
+ - require:
+ - git: sensu_monitor_openstack_source
+
+{%- endif %}
+
+{%- endif %}
+{%- endfor %}
+
+sensu_client_check_grains:
+ file.managed:
+ - name: /etc/salt/grains
+ - source: salt://sensu/files/checks.grain
+ - template: jinja
+ - mode: 600
+ - require:
+ - pkg: sensu_client_packages
+
+/etc/sensu/conf.d/rabbitmq.json:
+ file.managed:
+ - source: salt://sensu/files/rabbitmq.json
+ - template: jinja
+ - mode: 644
+ - require:
+ - file: /etc/sensu
+ - watch_in:
+ - service: service_sensu_client
+
+/etc/sensu/conf.d/client.json:
+ file.managed:
+ - source: salt://sensu/files/client.json
+ - template: jinja
+ - mode: 644
+ - require:
+ - file: /etc/sensu
+ - watch_in:
+ - service: service_sensu_client
+
+service_sensu_client:
+ service.running:
+ - name: sensu-client
+ - enable: true
+ - require:
+ - pkg: sensu_client_packages
+
+/etc/sudoers.d/90-sensu-user:
+ file.managed:
+ - source: salt://sensu/files/sudoer
+ - user: root
+ - group: root
+ - mode: 440
+
+{%- endif %}
diff --git a/sensu/dashboard.sls b/sensu/dashboard.sls
new file mode 100644
index 0000000..5416d65
--- /dev/null
+++ b/sensu/dashboard.sls
@@ -0,0 +1,29 @@
+{%- from "sensu/map.jinja" import dashboard with context %}
+{%- if dashboard.enabled %}
+
+include:
+- sensu._common
+
+sensu_dashboard_packages:
+ pkg.installed:
+ - names: {{ dashboard.pkgs }}
+ - require_in:
+ - file: /etc/sensu
+ - service: service_sensu_dashboard
+
+/etc/sensu/uchiwa.json:
+ file.managed:
+ - source: salt://sensu/files/uchiwa.json
+ - template: jinja
+ - mode: 644
+ - require:
+ - file: /etc/sensu
+ - watch_in:
+ - service: service_sensu_dashboard
+
+service_sensu_dashboard:
+ service.running:
+ - name: uchiwa
+ - enable: true
+
+{%- endif %}
\ No newline at end of file
diff --git a/sensu/files/api.json b/sensu/files/api.json
new file mode 100644
index 0000000..ec290ec
--- /dev/null
+++ b/sensu/files/api.json
@@ -0,0 +1,7 @@
+{%- from "sensu/map.jinja" import server with context -%}
+{
+ "api": {
+ "host": "{{ server.bind.address }}",
+ "port": {{ server.bind.port }}
+ }
+}
\ No newline at end of file
diff --git a/sensu/files/check.json b/sensu/files/check.json
new file mode 100644
index 0000000..5f0efee
--- /dev/null
+++ b/sensu/files/check.json
@@ -0,0 +1,50 @@
+{
+ "checks": {
+ "{{ check_name }}": {
+ "subscribers": [
+ {%- for subscriber in check.subscribers %}
+ "{{ subscriber }}"{% if not loop.last %},{% endif %}
+ {%- endfor %}
+ ],
+ "command": "{{ check.command }}",
+ {%- if check.notification is defined %}
+ "notification": "{{ check.notification }}",
+ {%- endif %}
+ {%- if check.asset is defined %}
+ "asset": "{{ check.asset }}",
+ {%- endif %}
+ {%- if check.customer is defined %}
+ "customer": "{{ check.customer }}",
+ {%- endif %}
+ {%- if check.standalone is defined %}
+ {%- if check.standalone %}
+ "standalone": true,
+ {%- else %}
+ "standalone": false,
+ {%- endif %}
+ {%- endif %}
+
+ {%- if check.type is defined %}
+ "handlers": [
+ {%- for handler in pillar.sensu.server.metric_handlers %}
+ "{{ handler }}"{% if not loop.last %},{% endif %}
+ {%- endfor %}
+ ],
+ "type": "{{ check.type }}",
+ {%- else %}
+ "handlers": [
+ "default"
+ {#
+ {%- for handler in pillar.sensu.server.check_handlers %}
+ "{{ handler }}"{% if not loop.last %},{% endif %}
+ {%- endfor %}
+ #}
+ ],
+ {%- endif %}
+ {%- if check.occurrences is defined %}
+ "occurrences": {{ check.occurrences }},
+ {%- endif %}
+ "interval": {{ check.interval }}
+ }
+ }
+}
diff --git a/sensu/files/check_manual.json b/sensu/files/check_manual.json
new file mode 100644
index 0000000..266789c
--- /dev/null
+++ b/sensu/files/check_manual.json
@@ -0,0 +1,54 @@
+{%- for check in pillar.sensu.server.checks %}
+{%- if check_name == check.name %}
+{
+ "checks": {
+ "{{ check.name }}": {
+ "subscribers": [
+ {%- for subscriber in check.subscribers %}
+ "{{ subscriber }}"{% if not loop.last %},{% endif %}
+ {%- endfor %}
+ ],
+ "command": "{{ check.command }}",
+ {%- if check.notification is defined %}
+ "notification": "{{ check.notification }}",
+ {%- endif %}
+ {%- if check.asset is defined %}
+ "asset": "{{ check.asset }}",
+ {%- endif %}
+ {%- if check.customer is defined %}
+ "customer": "{{ check.customer }}",
+ {%- endif %}
+ {%- if check.standalone is defined %}
+ {%- if check.standalone %}
+ "standalone": true,
+ {%- else %}
+ "standalone": false,
+ {%- endif %}
+ {%- endif %}
+
+ {%- if check.type is defined %}
+ "handlers": [
+ {%- for handler in pillar.sensu.server.metric_handlers %}
+ "{{ handler }}"{% if not loop.last %},{% endif %}
+ {%- endfor %}
+ ],
+ "type": "{{ check.type }}",
+ {%- else %}
+ "handlers": [
+ "default"
+ {#
+ {%- for handler in pillar.sensu.server.check_handlers %}
+ "{{ handler }}"{% if not loop.last %},{% endif %}
+ {%- endfor %}
+ #}
+ ],
+ {%- endif %}
+ {%- if check.occurrences is defined %}
+ "occurrences": {{ check.occurrences }},
+ {%- endif %}
+ "interval": {{ check.interval }}
+ }
+ }
+}
+{%- endif %}
+{%- endfor %}
\ No newline at end of file
diff --git a/sensu/files/checks.grain b/sensu/files/checks.grain
new file mode 100644
index 0000000..8b96b74
--- /dev/null
+++ b/sensu/files/checks.grain
@@ -0,0 +1,10 @@
+sensu_checks:
+{%- from "sensu/map.jinja" import client with context %}
+{%- for service in client.supported_services %}
+{%- if service in grains.get('services', []) %}
+{%- set service_checks_file = service+'/files/sensu.conf' %}
+{%- macro indent_service_checks() %}{% include service_checks_file %}{% endmacro %}
+{{ indent_service_checks()|indent(2, true) }}
+
+{%- endif %}
+{%- endfor %}
\ No newline at end of file
diff --git a/sensu/files/checks/check-dns.rb b/sensu/files/checks/check-dns.rb
new file mode 100644
index 0000000..5b868bb
--- /dev/null
+++ b/sensu/files/checks/check-dns.rb
@@ -0,0 +1,107 @@
+#! /usr/bin/env ruby
+#
+# check-dns
+#
+# DESCRIPTION:
+# This plugin checks DNS resolution using `dig`.
+# Note: if testing reverse DNS with -t PTR option,
+# results will end with trailing '.' (dot)
+#
+# OUTPUT:
+# plain text
+#
+# PLATFORMS:
+# Linux, BSD
+#
+# DEPENDENCIES:
+# gem: sensu-plugin
+#
+# USAGE:
+# example commands
+#
+# NOTES:
+# Does it behave differently on specific platforms, specific use cases, etc
+#
+# LICENSE:
+# Copyright 2014 Sonian, Inc. and contributors. <support@sensuapp.org>
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+#
+
+require 'rubygems' if RUBY_VERSION < '1.9.0'
+require 'sensu-plugin/check/cli'
+
+class DNS < Sensu::Plugin::Check::CLI
+ option :domain,
+ description: 'Domain to resolve (or ip if type PTR)',
+ short: '-d DOMAIN',
+ long: '--domain DOMAIN'
+
+ option :type,
+ description: 'Record type to resolve (A, AAAA, TXT, etc) use PTR for reverse lookup',
+ short: '-t RECORD',
+ long: '--type RECORD',
+ default: 'A'
+
+ option :server,
+ description: 'Server to use for resolution',
+ short: '-s SERVER',
+ long: '--server SERVER'
+
+ option :result,
+ description: 'A positive result entry',
+ short: '-r RESULT',
+ long: '--result RESULT'
+
+ option :warn_only,
+ description: 'Warn instead of critical on failure',
+ short: '-w',
+ long: '--warn-only',
+ boolean: true
+
+ option :debug,
+ description: 'Print debug information',
+ long: '--debug',
+ boolean: true
+
+ def resolve_domain
+ if config[:type] == 'PTR'
+ cmd = "dig #{config[:server] ? "@#{config[:server]}" : ''} -x #{config[:domain]} +short +time=1"
+ else
+ cmd = "dig #{config[:server] ? "@#{config[:server]}" : ''} #{config[:domain]} #{config[:type]} +short +time=1"
+ end
+ puts cmd if config[:debug]
+ output = `#{cmd}`
+ puts output if config[:debug]
+ # Trim, split, remove comments and empty lines
+ entries = output.strip.split("\n").reject { |l| l.match('^;') || l.match('^$') }
+ puts "Entries: #{entries}" if config[:debug]
+ entries
+ end
+
+ def run
+ if config[:domain].nil?
+ unknown 'No domain specified'
+ else
+ entries = resolve_domain
+ if entries.length.zero?
+ if config[:warn_only]
+ warning "Could not resolve #{config[:domain]}"
+ else
+ critical "Could not resolve #{config[:domain]}"
+ end
+ else
+ if config[:result]
+ # #YELLOW
+ if entries.include?(config[:result]) # rubocop:disable BlockNesting
+ ok "Resolved #{config[:domain]} including #{config[:result]}"
+ else
+ critical "Resolved #{config[:domain]} did not include #{config[:result]}"
+ end
+ else
+ ok "Resolved #{config[:domain]} #{config[:type]} records"
+ end
+ end
+ end
+ end
+end
diff --git a/sensu/files/checks/check-haproxy.rb b/sensu/files/checks/check-haproxy.rb
new file mode 100644
index 0000000..6219d26
--- /dev/null
+++ b/sensu/files/checks/check-haproxy.rb
@@ -0,0 +1,173 @@
+#! /usr/bin/env ruby
+#
+# check-haproxy.rb
+#
+# DESCRIPTION:
+# Defaults to checking if ALL services in the given group are up; with
+# -1, checks if ANY service is up. with -A, checks all groups.
+#
+# OUTPUT:
+# plain text
+#
+# PLATFORMS:
+# Linux
+#
+# DEPENDENCIES:
+# gem: sensu-plugin
+#
+# USAGE:
+#
+# LICENSE:
+# Copyright 2011 Sonian, Inc. and contributors. <support@sensuapp.org>
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+#
+
+require 'sensu-plugin/check/cli'
+require 'net/http'
+require 'socket'
+require 'csv'
+require 'uri'
+
+#
+# Check HA Proxy
+#
+class CheckHAProxy < Sensu::Plugin::Check::CLI
+ option :stats_source,
+ short: '-S HOSTNAME|SOCKETPATH',
+ long: '--stats HOSTNAME|SOCKETPATH',
+ description: 'HAproxy web stats hostname or path to stats socket',
+ required: true
+ option :port,
+ short: '-P PORT',
+ long: '--port PORT',
+ description: 'HAproxy web stats port',
+ default: '80'
+ option :path,
+ short: '-q STATUSPATH',
+ long: '--statspath STATUSPATH',
+ description: 'HAproxy web stats path',
+ default: '/'
+ option :username,
+ short: '-u USERNAME',
+ long: '--user USERNAME',
+ description: 'HAproxy web stats username'
+ option :password,
+ short: '-p PASSWORD',
+ long: '--pass PASSWORD',
+ description: 'HAproxy web stats password'
+ option :warn_percent,
+ short: '-w PERCENT',
+ boolean: true,
+ default: 50,
+ proc: proc(&:to_i),
+ description: 'Warning Percent, default: 50'
+ option :crit_percent,
+ short: '-c PERCENT',
+ boolean: true,
+ default: 25,
+ proc: proc(&:to_i),
+ description: 'Critical Percent, default: 25'
+ option :session_warn_percent,
+ short: '-W PERCENT',
+ boolean: true,
+ default: 75,
+ proc: proc(&:to_i),
+ description: 'Session Limit Warning Percent, default: 75'
+ option :session_crit_percent,
+ short: '-C PERCENT',
+ boolean: true,
+ default: 90,
+ proc: proc(&:to_i),
+ description: 'Session Limit Critical Percent, default: 90'
+ option :all_services,
+ short: '-A',
+ boolean: true,
+ description: 'Check ALL Services, flag enables'
+ option :missing_ok,
+ short: '-m',
+ boolean: true,
+ description: 'Missing OK, flag enables'
+ option :service,
+ short: '-s SVC',
+ description: 'Service Name to Check'
+ option :exact_match,
+ short: '-e',
+ boolean: false,
+ description: 'Whether service name specified with -s should be exact match or not'
+
+ def run #rubocop:disable all
+ if config[:service] || config[:all_services]
+ services = acquire_services
+ else
+ unknown 'No service specified'
+ end
+
+ if services.empty?
+ message "No services matching /#{config[:service]}/"
+ if config[:missing_ok]
+ ok
+ else
+ warning
+ end
+ else
+ percent_up = 100 * services.count { |svc| svc[:status] == 'UP' || svc[:status] == 'OPEN' } / services.size
+ failed_names = services.reject { |svc| svc[:status] == 'UP' || svc[:status] == 'OPEN' }.map { |svc| svc[:svname] }
+ critical_sessions = services.select { |svc| svc[:slim].to_i > 0 && (100 * svc[:scur].to_f / svc[:slim].to_f) > config[:session_crit_percent] }
+ warning_sessions = services.select { |svc| svc[:slim].to_i > 0 && (100 * svc[:scur].to_f / svc[:slim].to_f) > config[:session_warn_percent] }
+
+ status = "UP: #{percent_up}% of #{services.size} /#{config[:service]}/ services" + (failed_names.empty? ? '' : ", DOWN: #{failed_names.join(', ')}")
+ if percent_up < config[:crit_percent]
+ critical status
+ elsif !critical_sessions.empty?
+ critical status + '; Active sessions critical: ' + critical_sessions.map { |s| "#{s[:scur]} #{s[:pxname]}.#{s[:svname]}" }.join(', ')
+ elsif percent_up < config[:warn_percent]
+ warning status
+ elsif !warning_sessions.empty?
+ warning status + '; Active sessions warning: ' + warning_sessions.map { |s| "#{s[:scur]} #{s[:pxname]}.#{s[:svname]}" }.join(', ')
+ else
+ ok status
+ end
+ end
+ end
+
+ def acquire_services #rubocop:disable all
+ uri = URI.parse(config[:stats_source])
+
+ if uri.is_a?(URI::Generic) && File.socket?(uri.path)
+ srv = UNIXSocket.open(config[:stats_source])
+ srv.write("show stat\n")
+ out = srv.read
+ srv.close
+ else
+ res = Net::HTTP.start(config[:stats_source], config[:port]) do |http|
+ req = Net::HTTP::Get.new("/#{config[:path]};csv;norefresh")
+ unless config[:username].nil?
+ req.basic_auth config[:username], config[:password]
+ end
+ http.request(req)
+ end
+ unless res.code.to_i == 200
+ unknown "Failed to fetch from #{config[:stats_source]}:#{config[:port]}/#{config[:path]}: #{res.code}"
+ end
+
+ out = res.body
+ end
+
+ parsed = CSV.parse(out, skip_blanks: true)
+ keys = parsed.shift.reject(&:nil?).map { |k| k.match(/(\w+)/)[0].to_sym }
+ haproxy_stats = parsed.map { |line| Hash[keys.zip(line)] }
+
+ if config[:all_services]
+ haproxy_stats
+ else
+ regexp = config[:exact_match] ? Regexp.new("^#{config[:service]}$") : Regexp.new("#{config[:service]}")
+ haproxy_stats.select do |svc|
+ svc[:pxname] =~ regexp
+ # #YELLOW
+ end.reject do |svc| # rubocop: disable MultilineBlockChain
+ %w(FRONTEND BACKEND).include?(svc[:svname])
+ end
+ end
+ end
+end
diff --git a/sensu/files/checks/check-rabbitmq-queue.rb b/sensu/files/checks/check-rabbitmq-queue.rb
new file mode 100644
index 0000000..be5d4da
--- /dev/null
+++ b/sensu/files/checks/check-rabbitmq-queue.rb
@@ -0,0 +1,117 @@
+#!/usr/bin/env ruby
+# encoding: UTF-8
+#
+# Check RabbitMQ Queue Messages
+# ===
+#
+# DESCRIPTION:
+# This plugin checks the number of messages queued on the RabbitMQ server in a specific queues
+#
+# PLATFORMS:
+# Linux, BSD, Solaris
+#
+# DEPENDENCIES:
+# RabbitMQ rabbitmq_management plugin
+# gem: sensu-plugin
+# gem: carrot-top
+#
+# LICENSE:
+# Copyright 2012 Evan Hazlett <ejhazlett@gmail.com>
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+
+require 'sensu-plugin/check/cli'
+require 'socket'
+require 'carrot-top'
+
+# main plugin class
+class CheckRabbitMQMessages < Sensu::Plugin::Check::CLI
+ option :host,
+ description: 'RabbitMQ management API host',
+ long: '--host HOST',
+ default: 'localhost'
+
+ option :port,
+ description: 'RabbitMQ management API port',
+ long: '--port PORT',
+ proc: proc(&:to_i),
+ default: 15_672
+
+ option :ssl,
+ description: 'Enable SSL for connection to the API',
+ long: '--ssl',
+ boolean: true,
+ default: false
+
+ option :user,
+ description: 'RabbitMQ management API user',
+ long: '--user USER',
+ default: 'guest'
+
+ option :password,
+ description: 'RabbitMQ management API password',
+ long: '--password PASSWORD',
+ default: 'guest'
+
+ option :queue,
+ description: 'RabbitMQ queue to monitor',
+ long: '--queue queue_names',
+ required: true,
+ proc: proc { |a| a.split(',') }
+
+ option :warn,
+ short: '-w NUM_MESSAGES',
+ long: '--warn NUM_MESSAGES',
+ description: 'WARNING message count threshold',
+ default: 250
+
+ option :critical,
+ short: '-c NUM_MESSAGES',
+ long: '--critical NUM_MESSAGES',
+ description: 'CRITICAL message count threshold',
+ default: 500
+
+ def acquire_rabbitmq_info
+ begin
+ rabbitmq_info = CarrotTop.new(
+ host: config[:host],
+ port: config[:port],
+ user: config[:user],
+ password: config[:password],
+ ssl: config[:ssl]
+ )
+ rescue
+ warning 'could not get rabbitmq info'
+ end
+ rabbitmq_info
+ end
+
+ def run
+ @crit = []
+ @warn = []
+ rabbitmq = acquire_rabbitmq_info
+ queues = rabbitmq.queues
+ config[:queue].each do |q|
+ unless queues.map { |hash| hash['name'] }.include? q
+ @warn << "Queue #{ q } not available"
+ next
+ end
+ queues.each do |queue|
+ next unless queue['name'] == q
+ total = queue['messages']
+ total = 0 if total.nil?
+ message "#{total}"
+ @crit << "#{ q }:#{ total }" if total > config[:critical].to_i
+ @warn << "#{ q }:#{ total }" if total > config[:warn].to_i
+ end
+ end
+ if @crit.empty? && @warn.empty?
+ ok
+ elsif !(@crit.empty?)
+ critical "critical: #{ @crit } warning: #{ @warn }"
+ elsif !(@warn.empty?)
+ warning "critical: #{ @crit } warning: #{ @warn }"
+ end
+ end
+end
diff --git a/sensu/files/checks/check_contrail_analytics.sh b/sensu/files/checks/check_contrail_analytics.sh
new file mode 100755
index 0000000..3976818
--- /dev/null
+++ b/sensu/files/checks/check_contrail_analytics.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# check contrail-analytics services status on control nodes
+
+service=analytics
+
+read -ra contrail_status <<< $(sudo supervisorctl -s unix:///tmp/supervisord_$service.sock status)
+
+check_ok=0
+state=RUNNING
+
+read -ra contrail_test <<< ${contrail_status[@]#contrail-$service-api}
+#compare arrays
+if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ check_ok=1
+fi
+
+if [ check_ok=1 ]; then
+
+ read -ra contrail_test <<< ${contrail_status[@]#STARTING}
+
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STARTING
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#STOPPED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STOPPED
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#FATAL}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+else
+ state=FATAL
+fi
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+case "$state" in
+ RUNNING) OK=$(expr $OK + 1) ;;
+ STOPPED|STARTING) WARN=$(expr $WARN + 1) ;;
+ FATAL) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo State of contrail-$service OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_contrail_config.sh b/sensu/files/checks/check_contrail_config.sh
new file mode 100644
index 0000000..a87e539
--- /dev/null
+++ b/sensu/files/checks/check_contrail_config.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# check contrail-config services status on control nodes
+
+service=config
+
+read -ra contrail_status <<< $(sudo supervisorctl -s unix:///tmp/supervisord_$service.sock status)
+
+check_ok=0
+state=RUNNING
+
+read -ra contrail_test <<< ${contrail_status[@]#contrail-$service-nodemgr}
+#compare arrays
+if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ check_ok=1
+fi
+
+if [ check_ok=1 ]; then
+
+ read -ra contrail_test <<< ${contrail_status[@]#STARTING}
+
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STARTING
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#STOPPED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STOPPED
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#FATAL}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+else
+ state=FATAL
+fi
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+case "$state" in
+ RUNNING) OK=$(expr $OK + 1) ;;
+ STOPPED|STARTING) WARN=$(expr $WARN + 1) ;;
+ FATAL) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo State of contrail-$service OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_contrail_control.sh b/sensu/files/checks/check_contrail_control.sh
new file mode 100644
index 0000000..11573a4
--- /dev/null
+++ b/sensu/files/checks/check_contrail_control.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# check contrail-control services status on control nodes
+
+service=control
+
+read -ra contrail_status <<< $(sudo supervisorctl -s unix:///tmp/supervisord_$service.sock status)
+
+check_ok=0
+state=RUNNING
+
+read -ra contrail_test <<< ${contrail_status[@]#contrail-$service}
+#compare arrays
+if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ check_ok=1
+fi
+
+if [ check_ok=1 ]; then
+
+ read -ra contrail_test <<< ${contrail_status[@]#STARTING}
+
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STARTING
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#STOPPED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STOPPED
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#FATAL}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+else
+ state=FATAL
+fi
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+case "$state" in
+ RUNNING) OK=$(expr $OK + 1) ;;
+ STOPPED|STARTING) WARN=$(expr $WARN + 1) ;;
+ FATAL) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo State of contrail-$service OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_contrail_database.sh b/sensu/files/checks/check_contrail_database.sh
new file mode 100644
index 0000000..c27df5e
--- /dev/null
+++ b/sensu/files/checks/check_contrail_database.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# check contrail-database services status on control nodes
+
+service=contrail-database
+
+read -ra contrail_status <<< $(sudo supervisorctl -s unix:///tmp/supervisord_$service.sock status)
+
+check_ok=0
+state=RUNNING
+
+read -ra contrail_test <<< ${contrail_status[@]#contrail-$service}
+#compare arrays
+if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ check_ok=1
+fi
+
+if [ check_ok=1 ]; then
+
+ read -ra contrail_test <<< ${contrail_status[@]#STARTING}
+
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STARTING
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#STOPPED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STOPPED
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#FATAL}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+else
+ state=FATAL
+fi
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+case "$state" in
+ RUNNING) OK=$(expr $OK + 1) ;;
+ STOPPED|STARTING) WARN=$(expr $WARN + 1) ;;
+ FATAL) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo State of contrail-$service OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_contrail_support_service.sh b/sensu/files/checks/check_contrail_support_service.sh
new file mode 100644
index 0000000..babec63
--- /dev/null
+++ b/sensu/files/checks/check_contrail_support_service.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# check contrail-support-service services status on control nodes
+
+service=support_service
+
+read -ra contrail_status <<< $(sudo supervisorctl -s unix:///tmp/supervisord_$service.sock status)
+
+check_ok=0
+state=RUNNING
+
+read -ra contrail_test <<< ${contrail_status[@]#rabbitmq-server}
+#compare arrays
+if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ check_ok=1
+fi
+
+if [ check_ok=1 ]; then
+
+ read -ra contrail_test <<< ${contrail_status[@]#STARTING}
+
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STARTING
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#STOPPED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STOPPED
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#FATAL}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+else
+ state=FATAL
+fi
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+case "$state" in
+ RUNNING) OK=$(expr $OK + 1) ;;
+ STOPPED|STARTING) WARN=$(expr $WARN + 1) ;;
+ FATAL) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo State of contrail-$service OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_contrail_webui.sh b/sensu/files/checks/check_contrail_webui.sh
new file mode 100644
index 0000000..2e69783
--- /dev/null
+++ b/sensu/files/checks/check_contrail_webui.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+#
+# check contrail-web ui services status on control nodes
+
+service=webui
+
+read -ra contrail_status <<< $(sudo supervisorctl -s unix:///tmp/supervisord_$service.sock status)
+
+check_ok=0
+state=RUNNING
+
+read -ra contrail_test <<< ${contrail_status[@]#contrail-$service}
+#compare arrays
+if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ check_ok=1
+fi
+
+if [ check_ok=1 ]; then
+
+ read -ra contrail_test <<< ${contrail_status[@]#STARTING}
+
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STARTING
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#STOPPED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STOPPED
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#FATAL}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+else
+ state=FATAL
+fi
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+case "$state" in
+ RUNNING) OK=$(expr $OK + 1) ;;
+ STOPPED|STARTING) WARN=$(expr $WARN + 1) ;;
+ FATAL) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo State of contrail-$service OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_fibrealliance.sh b/sensu/files/checks/check_fibrealliance.sh
new file mode 100644
index 0000000..8d25c95
--- /dev/null
+++ b/sensu/files/checks/check_fibrealliance.sh
@@ -0,0 +1,154 @@
+#!/bin/sh
+
+# Version 0.0.3 2010-08-18
+# Verify that the sensors check returns data. If not, return unknown to nagios.
+
+# Version 0.0.2 2010-05-11
+# Ulric Eriksson <ulric.eriksson@dgc.se>
+
+BASEOID=.1.3.6.1.3.94
+SYSTEMOID=$BASEOID.1.6
+connUnitStateOID=$SYSTEMOID.1.5
+# 1 = unknown, 2 = online, 3 = diag/offline
+connUnitStatusOID=$SYSTEMOID.1.6
+# 3 = OK, 4 = warning, 5 = failed
+connUnitProductOID=$SYSTEMOID.1.7
+# e.g. "QLogic SANbox2 FC Switch"
+connUnitSnOID=$SYSTEMOID.1.8
+# chassis serial number
+connUnitNumSensorsOID=$SYSTEMOID.1.14
+# number of sensors in connUnitSensorTable
+connUnitNameOID=$SYSTEMOID.1.20
+# symbolic name
+connUnitContactOID=$SYSTEMOID.1.23
+connUnitLocationOID=$SYSTEMOID.1.24
+
+SENSOROID=$BASEOID.1.8
+connUnitSensorIndexOID=$SENSOROID.1.2
+connUnitSensorNameOID=$SENSOROID.1.3
+# textual id of sensor
+connUnitSensorStatusOID=$SENSOROID.1.4
+# 1 = unknown, 2 = other, 3 = ok, 4 = warning, 5 = failed
+connUnitSensorMessageOID=$SENSOROID.1.6
+# textual status message
+
+PORTOID=$BASEOID.1.10
+connUnitPortUnitIdOID=$PORTOID.1.1
+connUnitPortIndexOID=$PORTOID.1.2
+connUnitPortTypeOID=$PORTOID.1.3
+connUnitPortStateOID=$PORTOID.1.6
+# user selected state
+# 1 = unknown, 2 = online, 3 = offline, 4 = bypassed, 5 = diagnostics
+connUnitPortStatusOID=$PORTOID.1.7
+# actual status
+# 1 = unknown, 2 = unused, 3 = ready, 4 = warning, 5 = failure
+# 6 = notparticipating, 7 = initializing, 8 = bypass, 9 = ols, 10 = other
+# Always returns 2, so this is utterly useless
+connUnitPortSpeedOID=$PORTOID.1.15
+# port speed in kilobytes per second
+
+usage()
+{
+ echo "Usage: $0 -H host -C community -T status|sensors"
+ exit 0
+}
+
+
+get_system()
+{
+ echo "$SYSTEM"|grep "^$1."|head -1|sed -e 's,^.*: ,,'
+}
+
+get_sensor()
+{
+ echo "$SENSOR"|grep "^$2.*$1 = "|head -1|sed -e 's,^.*: ,,'
+}
+
+get_port()
+{
+ echo "$PORT"|grep "^$2.*$1 = "|head -1|sed -e 's,^.*: ,,'
+}
+
+if test "$1" = -h; then
+ usage
+fi
+
+while getopts "H:C:T:" o; do
+ case "$o" in
+ H )
+ HOST="$OPTARG"
+ ;;
+ C )
+ COMMUNITY="$OPTARG"
+ ;;
+ T )
+ TEST="$OPTARG"
+ ;;
+ * )
+ usage
+ ;;
+ esac
+done
+
+RESULT=
+STATUS=0 # OK
+
+case "$TEST" in
+sensors )
+ SENSOR=`snmpwalk -v 1 -c $COMMUNITY -On $HOST $SENSOROID`
+ # Figure out which sensor indexes we have
+ connUnitSensorIndex=`echo "$SENSOR"|
+ grep -F "$connUnitSensorIndexOID."|
+ sed -e 's,^.*: ,,'`
+ for i in $connUnitSensorIndex; do
+ connUnitSensorName=`get_sensor $i $connUnitSensorNameOID`
+ connUnitSensorStatus=`get_sensor $i $connUnitSensorStatusOID`
+ connUnitSensorMessage=`get_sensor $i $connUnitSensorMessageOID`
+ RESULT="$RESULT$connUnitSensorName = $connUnitSensorMessage
+"
+ if test "$connUnitSensorStatus" != 3; then
+ STATUS=2 # Critical
+ fi
+ done
+ if test -z "$SENSOR"; then
+ STATUS=3 # Unknown
+ fi
+ ;;
+status )
+ SYSTEM=`snmpwalk -v 1 -c $COMMUNITY -On $HOST $SYSTEMOID`
+ connUnitStatus=`get_system $connUnitStatusOID`
+ connUnitProduct=`get_system $connUnitProductOID`
+ connUnitSn=`get_system $connUnitSnOID`
+ case "$connUnitStatus" in
+ 3 )
+ RESULT="Overall unit status: OK"
+ ;;
+ 4 )
+ RESULT="Overall unit status: Warning"
+ STATUS=1
+ ;;
+ 5 )
+ RESULT="Overall unit status: Failed"
+ STATUS=2
+ ;;
+ * )
+ RESULT="Overall unit status: Unknown"
+ STATUS=3
+ ;;
+ esac
+ if test ! -z "$connUnitProduct"; then
+ RESULT="$RESULT
+Product: $connUnitProduct"
+ fi
+ if test ! -z "$connUnitSn"; then
+ RESULT="$RESULT
+Serial number: $connUnitSn"
+ fi
+ ;;
+* )
+ usage
+ ;;
+esac
+
+echo "$RESULT"
+exit $STATUS
diff --git a/sensu/files/checks/check_fqdn.py b/sensu/files/checks/check_fqdn.py
new file mode 100644
index 0000000..4b04b8f
--- /dev/null
+++ b/sensu/files/checks/check_fqdn.py
@@ -0,0 +1,74 @@
+#!/usr/bin/env python
+"""
+nagios plugin to monitor fqdn validity
+--------------------------------------
+
+usage
+
+::
+
+ check_fqdn.py -n node01 -f node01.cluster.domain.com
+
+"""
+from optparse import OptionParser
+import os
+import subprocess
+
+#nagios return codes
+UNKNOWN = -1
+OK = 0
+WARNING = 1
+CRITICAL = 2
+
+HOSTNAME_CHECK='hostname'
+
+#supervisor states, map state to desired warning level
+
+def get_status(hostname, fqdn):
+
+ ok = []
+ crit = []
+ warn = []
+
+ lines = subprocess.Popen([HOSTNAME_CHECK], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0]
+ hostname_output = lines.splitlines()[0]
+
+ if hostname == hostname_output:
+ ok.append('Hostname is OK. ')
+ else:
+ crit.append('Hostname %s does not match desired %s. ' % (hostname_output, hostname))
+
+ try:
+ lines = subprocess.Popen([HOSTNAME_CHECK, '-f'], stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[0]
+ fqdn_output = lines.splitlines()[0]
+
+ if fqdn == fqdn_output:
+ ok.append('FQDN is OK. ')
+ else:
+ crit.append('FQDN %s does not match desired %s. ' % (fqdn_output, fqdn))
+ except:
+ crit.append('FQDN is not desired %s. ' % fqdn)
+
+ status = OK
+ prepend = "OK"
+
+ if len(warn) > 0:
+ status = WARNING
+ prepend = "WARNING"
+
+ if len(crit) > 0:
+ status = CRITICAL
+ prepend = "CRITICAL"
+
+ return ("%s - %s%s%s" %(prepend, "".join(crit), "".join(warn), "".join(ok)), status)
+
+parser = OptionParser()
+parser.add_option('-n', '--hostname', dest='hostname',
+ help="Server hostname")
+parser.add_option('-f', '--fqdn', dest='fqdn')
+
+options, args = parser.parse_args()
+
+output = get_status(options.hostname, options.fqdn)
+print output[0]
+raise SystemExit, output[1]
diff --git a/sensu/files/checks/check_galera_cluster b/sensu/files/checks/check_galera_cluster
new file mode 100644
index 0000000..89b5d95
--- /dev/null
+++ b/sensu/files/checks/check_galera_cluster
@@ -0,0 +1,159 @@
+#!/bin/bash
+PROGNAME=`basename $0`
+VERSION="Version 1.0,"
+AUTHOR="Guillaume Coré <g@fridim.org>"
+
+ST_OK=0
+ST_WR=1
+ST_CR=2
+ST_UK=3
+
+print_version() {
+ echo "$VERSION $AUTHOR"
+}
+
+print_help() {
+ print_version $PROGNAME $VERSION
+ echo ""
+ echo "$PROGNAME is a Nagios plugin to monitor Galera cluster status."
+ echo ""
+ echo "$PROGNAME -u USER -p PASSWORD [-H HOST] [-P PORT] [-w SIZE] [-c SIZE] [-f FLOAT] [-0]"
+ echo ""
+ echo "Options:"
+ echo " u)"
+ echo " MySQL user."
+ echo " p)"
+ echo " MySQL password."
+ echo " H)"
+ echo " MySQL host. Default is localhost."
+ echo " P)"
+ echo " MySQL port. Default is 3306."
+ echo " w)"
+ echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
+ echo " c)"
+ echo " Sets minimum number of nodes in the cluster when CRITICAL is raised. (default is 2)."
+ echo " f)"
+ echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
+ echo " 0)"
+ echo " Rise CRITICAL if the node is not primary"
+ exit $ST_UK
+}
+
+# default values
+crit=2
+port='3306'
+mysqlhost='localhost'
+fcp=0.1
+
+while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
+ case $OPTION in
+ h)
+ print_help
+ exit $ST_UK
+ ;;
+ v)
+ print_version $PROGNAME $VERSION
+ exit $ST_UK
+ ;;
+ u)
+ mysqluser=$OPTARG
+ ;;
+ p)
+ password=$OPTARG
+ ;;
+ H)
+ mysqlhost=$OPTARG
+ ;;
+ P)
+ port=$OPTARG
+ ;;
+ w)
+ warn=$OPTARG
+ ;;
+ c)
+ crit=$OPTARG
+ ;;
+ f)
+ fcp=$OPTARG
+ ;;
+ 0)
+ primary='TRUE'
+ ;;
+ ?)
+ echo "Unknown argument: $1"
+ print_help
+ exit $ST_UK
+ ;;
+ esac
+done
+
+if [ -z "$warn" ]; then
+ warn=$crit
+fi
+
+# MANDATORY args
+if [ -z "$mysqluser" ]; then
+ echo "argument -u missing"
+ print_help
+ exit $ST_UK
+fi
+
+if [ -z "$password" ]; then
+ echo "argument -p missing"
+ print_help
+ exit $ST_UK
+fi
+
+r1=$(mysql -h$mysqlhost -P$port -u$mysqluser -p$password -B -N -e "show status like 'wsrep_cluster_size'"|cut -f 2) # 3 (GALERA_CLUSTER_SIZE)
+r2=$(mysql -h$mysqlhost -P$port -u$mysqluser -p$password -B -N -e "show status like 'wsrep_cluster_status'"|cut -f 2) # Primary
+r3=$(mysql -h$mysqlhost -P$port -u$mysqluser -p$password -B -N -e "show status like 'wsrep_flow_control_paused'"|cut -f 2) # < 0.1
+r4=$(mysql -h$mysqlhost -P$port -u$mysqluser -p$password -B -N -e "show status like 'wsrep_ready'"|cut -f 2) # ON
+r5=$(mysql -h$mysqlhost -P$port -u$mysqluser -p$password -B -N -e "show status like 'wsrep_connected'"|cut -f 2) # ON
+r6=$(mysql -h$mysqlhost -P$port -u$mysqluser -p$password -B -N -e "show status like 'wsrep_local_state_comment'"|cut -f 2) # Synced
+
+if [ -z "$r3" ]; then
+ echo "UNKNOWN: wsrep_flow_control_paused is empty"
+ ST_FINAL=$ST_UK
+fi
+
+if [ $(echo "$r3 > $fcp" | bc) = 1 ]; then
+ echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
+ ST_FINAL=$ST_CR
+fi
+
+if [ "$primary" = 'TRUE' ]; then
+ if [ "$r2" != 'Primary' ]; then
+ echo "CRITICAL: node is not primary"
+ ST_FINAL=$ST_CR
+ fi
+fi
+
+if [ "$r4" != 'ON' ]; then
+ echo "CRITICAL: node is not ready"
+ ST_FINAL=$ST_CR
+fi
+
+if [ "$r5" != 'ON' ]; then
+ echo "CRITICAL: node is not connected"
+ ST_FINAL=$ST_CR
+fi
+
+if [ "$r6" != 'Synced' ]; then
+ echo "CRITICAL: node is not synced"
+ ST_FINAL=$ST_CR
+fi
+
+if [ $r1 -gt $warn ]; then
+ echo "OK: number of NODES = $r1"
+ ST_FINAL=${ST_FINAL-$ST_OK}
+elif [ $r1 -le $crit ]; then
+ echo "CRITICAL: number of NODES = $r1"
+ ST_FINAL=$ST_CR
+elif [ $r1 -le $warn ]; then
+ echo "WARNING: number of NODES = $r1"
+ ST_FINAL=${ST_FINAL-$ST_WR}
+else
+ exit $ST_UK
+fi
+
+exit $ST_FINAL
diff --git a/sensu/files/checks/check_glusterfs b/sensu/files/checks/check_glusterfs
new file mode 100755
index 0000000..e3a72f6
--- /dev/null
+++ b/sensu/files/checks/check_glusterfs
@@ -0,0 +1,123 @@
+#!/bin/bash
+
+PATH=/sbin:/bin:/usr/sbin:/usr/bin
+
+PROGNAME=$(basename -- $0)
+
+# parse command line
+usage () {
+ echo ""
+ echo "USAGE: "
+ echo " $PROGNAME -v VOLUME -n BRICKS [-w GB -c GB]"
+ echo " -n BRICKS: number of bricks"
+ echo " -w and -c values in GB"
+ exit $STATE_UNKNOWN
+}
+
+while getopts "v:n:w:c:" opt; do
+ case $opt in
+ v) VOLUME=${OPTARG} ;;
+ n) BRICKS=${OPTARG} ;;
+ w) WARN=${OPTARG} ;;
+ c) CRIT=${OPTARG} ;;
+ *) usage ;;
+ esac
+done
+
+if [ -z "${VOLUME}" -o -z "${BRICKS}" ]; then
+ usage
+fi
+
+Exit () {
+ echo "$1: ${2:0}"
+ status=STATE_$1
+ case "$status" in
+ STATE_OK) EXITVAL=0 ;;
+ STATE_WARNING) EXITVAL=1 ;;
+ STATE_CRITICAL|STATE_UNKNOWN) EXITVAL=2 ;;
+ *) EXITVAL=2 ;;
+ esac
+ exit $EXITVAL
+}
+
+# check for commands
+for cmd in basename bc awk sudo pidof gluster; do
+ if ! type -p "$cmd" >/dev/null; then
+ Exit UNKNOWN "$cmd not found"
+ fi
+done
+
+# check for glusterd (management daemon)
+if ! pidof glusterd &>/dev/null; then
+ Exit CRITICAL "glusterd management daemon not running"
+fi
+
+# check for glusterfsd (brick daemon)
+if ! pidof glusterfsd &>/dev/null; then
+ Exit CRITICAL "glusterfsd brick daemon not running"
+fi
+
+# get volume status
+bricksfound=0
+freegb=9999999
+shopt -s nullglob
+while read -r line; do
+ field=($(echo $line))
+ case ${field[0]} in
+ Brick)
+ brick=${field[@]:2}
+ ;;
+ Disk)
+ key=${field[@]:0:3}
+ if [ "${key}" = "Disk Space Free" ]; then
+ freeunit=${field[@]:4}
+ free=${freeunit:0:-2}
+ unit=${freeunit#$free}
+ if [ "$unit" != "GB" ]; then
+ Exit UNKNOWN "unknown disk space size $freeunit"
+ fi
+ free=$(echo "${free} / 1" | bc -q)
+ if [ $free -lt $freegb ]; then
+ freegb=$free
+ fi
+ fi
+ ;;
+ Online)
+ online=${field[@]:2}
+ if [ "${online}" = "Y" ]; then
+ let $((bricksfound++))
+ else
+ errors=("${errors[@]}" "$brick offline")
+ fi
+ ;;
+ esac
+done < <(sudo gluster volume status ${VOLUME} detail)
+
+if [ $bricksfound -eq 0 ]; then
+ Exit CRITICAL "no bricks found"
+elif [ $bricksfound -lt $BRICKS ]; then
+ errors=("${errors[@]}" "found $bricksfound bricks, expected $BRICKS ")
+fi
+
+if [ -n "$CRIT" -a -n "$WARN" ]; then
+ if [ $CRIT -ge $WARN ]; then
+ Exit UNKNOWN "critical threshold below warning"
+ elif [ $freegb -lt $CRIT ]; then
+ Exit CRITICAL "free space ${freegb}GB"
+ elif [ $freegb -lt $WARN ]; then
+ errors=("${errors[@]}" "free space ${freegb}GB")
+ fi
+fi
+
+# exit with warning if errors
+if [ -n "$errors" ]; then
+ sep='; '
+ msg=$(printf "${sep}%s" "${errors[@]}")
+ msg=${msg:${#sep}}
+
+ Exit WARNING "${msg}"
+fi
+
+# exit with no errors
+Exit OK "${bricksfound} bricks; free space ${freegb}GB"
+
diff --git a/sensu/files/checks/check_http_json.py b/sensu/files/checks/check_http_json.py
new file mode 100644
index 0000000..b922f0b
--- /dev/null
+++ b/sensu/files/checks/check_http_json.py
@@ -0,0 +1,208 @@
+#!/usr/bin/python
+
+"""
+Check HTTP JSON Nagios Plugin
+
+Generic Nagios plugin which checks json values from a given endpoint against argument specified rules
+and determines the status and performance data for that service.
+"""
+
+import httplib, urllib, urllib2
+import json
+import argparse
+from pprint import pprint
+
+
+class NagiosHelper:
+ """Help with Nagios specific status string formatting."""
+ code = 0
+ message_prefixes = {0: 'OK', 1: 'WARNING', 2: 'CRITICAL', 3: 'UNKNOWN'}
+ message_text = ''
+ performance_data = ''
+
+ def getMessage(self):
+ """Build a status-prefixed message with optional performance data generated externally"""
+ text = "%s" % self.message_prefixes[self.code]
+ if self.message_text:
+ text += ": %s" % self.message_text
+ if self.performance_data:
+ text += "|%s" % self.performance_data
+ return text
+
+ def setCodeAndMessage(self, code, text):
+ self.code = code
+ self.message_text = text
+
+ def ok(self, text): self.setCodeAndMessage(0, text)
+ def warning(self, text): self.setCodeAndMessage(1, text)
+ def critical(self, text): self.setCodeAndMessage(2, text)
+ def unknown(self, text): self.setCodeAndMessage(3, text)
+
+class JsonHelper:
+ """Perform simple comparison operations against values in a given JSON dict"""
+ def __init__(self, json_data):
+ self.data = json_data
+
+ def equals(self, key, value): return self.exists(key) and str(self.get(key)) == value
+ def lte(self, key, value): return self.exists(key) and str(self.get(key)) <= value
+ def gte(self, key, value): return self.exists(key) and str(self.get(key)) >= value
+ def exists(self, key): return (self.get(key) != (None, 'not_found'))
+ def get(self, key, temp_data=''):
+ """Can navigate nested json keys with a dot format (Element.Key.NestedKey). Returns (None, 'not_found') if not found"""
+ if temp_data:
+ data = temp_data
+ else:
+ data = self.data
+
+ if '.' in key:
+ return self.get(key[key.find('.') + 1:], data[key[:key.find('.')]])
+ else:
+ if key in data:
+ return data[key]
+ else:
+ return (None, 'not_found')
+
+class JsonRuleProcessor:
+ """Perform checks and gather values from a JSON dict given rules and metrics definitions"""
+ def __init__(self, json_data, rules_args):
+ self.data = json_data
+ self.rules = rules_args
+
+ def isAlive(self):
+ """Return a tuple with liveness and reason for not liveness given existence, equality, and comparison rules"""
+ reason = ''
+ helper = JsonHelper(self.data)
+
+ if self.rules.key_list != None:
+ for k in self.rules.key_list:
+ if (helper.exists(k) == False):
+ reason += " Key %s did not exist." % k
+
+ if self.rules.key_value_list != None:
+ for kv in self.rules.key_value_list:
+ k, v = kv.split(',')
+ if (helper.equals(k, v) == False):
+ reason += " Value %s for key %s did not match." % (v, k)
+
+ if self.rules.key_lte_list != None:
+ for kv in self.rules.key_lte_list:
+ k, v = kv.split(',')
+ if (helper.lte(k, v) == False):
+ reason += " Value %s was not less than or equal to value for key %s." % (v, k)
+
+ if self.rules.key_gte_list != None:
+ for kv in self.rules.key_gte_list:
+ k, v = kv.split(',')
+ if (helper.gte(k, v) == False):
+ reason += " Value %s was not greater than or equal to value for key %s." % (v, k)
+
+ is_alive = (reason == '')
+
+ return (is_alive, reason)
+
+ def getMetrics(self):
+ """Return a Nagios specific performance metrics string given keys and parameter definitions"""
+ metrics = ''
+ helper = JsonHelper(self.data)
+
+ if self.rules.metric_list != None:
+ for metric in self.rules.metric_list:
+ key = metric
+ minimum = maximum = warn_range = crit_range = 0
+ uom = ''
+
+ vals = metric.split(',')
+
+ if ',' in metric:
+ vals = metric.split(',')
+
+ if len(vals) == 2:
+ key,uom = vals
+ if len(vals) == 4:
+ key,uom,minimum,maximum = vals
+ if len(vals) == 6:
+ key,uom,minimum,maximum,warn_range,crit_range = vals
+
+ if helper.exists(key):
+ metrics += "'%s'=%s" % (key, helper.get(key))
+ if uom: metrics += uom
+ metrics += ";%s" % minimum
+ metrics += ";%s" % maximum
+ if warn_range: metrics += ";%s" % warn_range
+ if crit_range: metrics += ";%s" % crit_range
+
+ metrics += ' '
+
+
+ return "%s" % metrics
+
+def parseArgs():
+ parser = argparse.ArgumentParser(description=
+ 'Nagios plugin which checks json values from a given endpoint against argument specified rules\
+ and determines the status and performance data for that service')
+
+ parser.add_argument('-H', '--host', dest='host', required=True, help='Host.')
+ parser.add_argument('-p', '--path', dest='path', help='Path.')
+ parser.add_argument('-e', '--key_exists', dest='key_list', nargs='*',
+ help='Checks existence of these keys to determine status.')
+ parser.add_argument('-q', '--key_equals', dest='key_value_list', nargs='*',
+ help='Checks equality of these keys and values (key,value key2,value2) to determine status.')
+ parser.add_argument('-l', '--key_lte', dest='key_lte_list', nargs='*',
+ help='Checks that these keys and values (key,value key2,value2) are less than or equal to\
+ the returned json value to determine status.')
+ parser.add_argument('-g', '--key_gte', dest='key_gte_list', nargs='*',
+ help='Checks that these keys and values (key,value key2,value2) are greater than or equal to\
+ the returned json value to determine status.')
+ parser.add_argument('-m', '--key_metric', dest='metric_list', nargs='*',
+ help='Gathers the values of these keys (key,UnitOfMeasure,Min,Max,WarnRange,CriticalRange) for Nagios performance data.\
+ More information about Range format and units of measure for nagios can be found at https://nagios-plugins.org/doc/guidelines.html\
+ Additional formats for this parameter are: (key), (key,UnitOfMeasure), (key,UnitOfMeasure,Min,Max).')
+ parser.add_argument('-d', '--debug', action='store_true', help='Debug mode.')
+
+ return parser.parse_args()
+
+def debugPrint(debug_flag, message, pretty_flag=False):
+ if debug_flag:
+ if pretty_flag:
+ pprint(message)
+ else:
+ print message
+
+"""Program entry point"""
+if __name__ == "__main__":
+ args = parseArgs()
+ nagios = NagiosHelper()
+
+ url = "http://%s" % args.host
+ if args.path: url += "/%s" % args.path
+ debugPrint(args.debug, "url:%s" % url)
+
+ # Attempt to reach the endpoint
+ try:
+ req = urllib2.Request(url)
+ response = urllib2.urlopen(req)
+ except urllib2.HTTPError as e:
+ nagios.unknown("HTTPError[%s], url:%s" % (str(e.code), url))
+ except urllib2.URLError as e:
+ nagios.critical("URLError[%s], url:%s" % (str(e.reason), url))
+ else:
+ jsondata = response.read()
+ data = json.loads(jsondata)
+
+ debugPrint(args.debug, 'json:')
+ debugPrint(args.debug, data, True)
+
+ # Apply rules to returned JSON data
+ processor = JsonRuleProcessor(data, args)
+ is_alive, reason = processor.isAlive()
+
+ if is_alive:
+ # Rules all passed, attempt to get performance data
+ nagios.performance_data = processor.getMetrics()
+ nagios.ok("Status OK.")
+ else:
+ nagios.warning("Status check failed, reason:%s" % reason)
+
+ # Print Nagios specific string and exit appropriately
+ print nagios.getMessage()
+ exit(nagios.code)
diff --git a/sensu/files/checks/check_ibm_bladecenter.py b/sensu/files/checks/check_ibm_bladecenter.py
new file mode 100644
index 0000000..ef7cb3c
--- /dev/null
+++ b/sensu/files/checks/check_ibm_bladecenter.py
@@ -0,0 +1,591 @@
+#!/usr/bin/python
+#
+# Copyright 2010, Pall Sigurdsson <palli@opensource.is>
+#
+# This script is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This script is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+# About this script
+#
+# This script will check the status of a remote IBM Bladecenter via SNMP.
+
+
+# No real need to change anything below here
+version = "1.1.2"
+ok = 0
+warning = 1
+critical = 2
+unknown = 3
+not_present = -1
+exit_status = -1
+
+
+state = {}
+state[not_present] = "Not Present"
+state[ok] = "OK"
+state[warning] = "Warning"
+state[critical] = "Critical"
+state[unknown] = "Unknown"
+
+
+longserviceoutput = "\n"
+perfdata = ""
+summary = ""
+sudo = False
+
+
+from sys import exit
+from sys import argv
+from os import getenv, putenv, environ
+import subprocess
+
+
+# Parse some Arguments
+from optparse import OptionParser
+parser = OptionParser()
+parser.add_option("-m", "--mode", dest="mode",
+ help="Which check mode is in use (powermodules,system-health,temperature,chassis-status,bladehealth,blowers,switchmodules)")
+parser.add_option("-H", "--host", dest="host",
+ help="Hostname or IP address of the host to check")
+parser.add_option("-w", "--warning", dest="warning_threshold",
+ help="Warning threshold", type="int", default=None)
+parser.add_option("-c", "--critical", type="int", dest="critical_threshold",
+ help="Critical threshold", default=None)
+parser.add_option("-e", "--exclude", dest="exclude",
+ help="Exclude specific object", default=None)
+parser.add_option("-v", "--snmp_version", dest="snmp_version",
+ help="SNMP Version to use (1, 2c or 3)", default="1")
+parser.add_option("-u", "--snmp_username", dest="snmp_username",
+ help="SNMP username (only with SNMP v3)", default=None)
+parser.add_option("-C", "--snmp_community", dest="snmp_community",
+ help="SNMP Community (only with SNMP v1|v2c)", default=None)
+parser.add_option("-p", "--snmp_password", dest="snmp_password",
+ help="SNMP password (only with SNMP v3)", default=None)
+parser.add_option("-l", "--snmp_security_level", dest="snmp_seclevel",
+ help="SNMP security level (only with SNMP v3) (noAuthNoPriv|authNoPriv|authPriv)", default=None)
+parser.add_option("-t", "--snmp_timeout", dest="snmp_timeout",
+ help="Timeout in seconds for SNMP", default=10)
+parser.add_option("-d", "--debug", dest="debug",
+ help="Enable debugging (for troubleshooting", action="store_true", default=False)
+
+(opts, args) = parser.parse_args()
+
+
+if opts.host is None:
+ parser.error("Hostname (-H) is required.")
+if opts.mode is None:
+ parser.error("Mode (--mode) is required.")
+
+snmp_options = ""
+
+
+def set_snmp_options():
+ global snmp_options
+ if opts.snmp_version is not None:
+ snmp_options = snmp_options + " -v%s" % opts.snmp_version
+ if opts.snmp_version == "3":
+ if opts.snmp_username is None:
+ parser.error("--snmp_username required with --snmp_version=3")
+ if opts.snmp_seclevel is None:
+ parser.error(
+ "--snmp_security_level required with --snmp_version=3")
+ if opts.snmp_password is None:
+ parser.error("--snmp_password required with --snmp_version=3")
+ snmp_options = snmp_options + " -u %s -l %s -A %s " % (
+ opts.snmp_username, opts.snmp_seclevel, opts.snmp_password)
+ else:
+ if opts.snmp_community is None:
+ parser.error(
+ "--snmp_community is required with --snmp_version=1|2c")
+ snmp_options = snmp_options + " -c %s " % opts.snmp_community
+ snmp_options += " -t %s " % (opts.snmp_timeout)
+
+
+def error(errortext):
+ print "* Error: %s" % errortext
+ exit(unknown)
+
+
+def debug(debugtext):
+ if opts.debug:
+ print debugtext
+
+
+def nagios_status(newStatus):
+ global exit_status
+ exit_status = max(exit_status, newStatus)
+ return exit_status
+
+
+def runCommand(command):
+ '''runCommand: Runs command from the shell prompt. Exit Nagios style if unsuccessful'''
+ debug("Executing: %s" % command)
+ proc = subprocess.Popen(
+ command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE,)
+ stdout, stderr = proc.communicate('through stdin to stdout')
+ if proc.returncode > 0:
+ print "Error %s: %s\n command was: '%s'" % (proc.returncode, stderr.strip(), command)
+ debug("results: %s" % (stdout.strip()))
+ if proc.returncode == 127: # File not found, lets print path
+ path = getenv("PATH")
+ print "Check if your path is correct %s" % (path)
+ if stderr.find('Password:') == 0 and command.find('sudo') == 0:
+ print "Check if user is in the sudoers file"
+ if stderr.find('sorry, you must have a tty to run sudo') == 0 and command.find('sudo') == 0:
+ print "Please remove 'requiretty' from /etc/sudoers"
+ exit(unknown)
+ else:
+ return stdout
+
+
+def end():
+ global summary
+ global longserviceoutput
+ global perfdata
+ global exit_status
+ print "%s - %s | %s" % (state[exit_status], summary, perfdata)
+ print longserviceoutput
+ if exit_status < 0:
+ exit_status = unknown
+ exit(exit_status)
+
+
+def add_perfdata(text):
+ global perfdata
+ text = text.strip()
+ perfdata = perfdata + " %s " % (text)
+
+
+def add_long(text):
+ global longserviceoutput
+ longserviceoutput = longserviceoutput + text + '\n'
+
+
+def add_summary(text):
+ global summary
+ summary = summary + text
+
+
+def set_path(path):
+ current_path = getenv('PATH')
+ if current_path.find('C:\\') > -1: # We are on this platform
+ if path == '':
+ pass
+ else:
+ path = ';' + path
+ else: # Unix/Linux, etc
+ if path == '':
+ path = ":/usr/sbin"
+ else:
+ path = ':' + path
+ current_path = "%s%s" % (current_path, path)
+ environ['PATH'] = current_path
+
+
+def snmpget(oid):
+ snmpgetcommand = "snmpget %s %s %s" % (snmp_options, opts.host, oid)
+ output = runCommand(snmpgetcommand)
+ oid, result = output.strip().split(' = ', 1)
+ resultType, resultValue = result.split(': ', 1)
+ if resultType == 'STRING': # strip quotes of the string
+ resultValue = resultValue[1:-1]
+ return resultValue
+
+# snmpwalk -v3 -u v3get mgmt-rek-proxy-p02 -A proxy2011 -l authNoPriv
+# 1.3.6.1.4.1.15497
+
+
+def snmpwalk(base_oid):
+ snmpwalkcommand = "snmpwalk %s %s %s" % (snmp_options, opts.host, base_oid)
+ output = runCommand(snmpwalkcommand + " " + base_oid)
+ return output
+
+
+def getTable(base_oid):
+ myTable = {}
+ output = snmpwalk(base_oid)
+ for line in output.split('\n'):
+ tmp = line.strip().split(' = ', 1)
+ if len(tmp) == 2:
+ oid, result = tmp
+ else:
+ continue
+ tmp = result.split(': ', 1)
+ if len(tmp) > 1:
+ resultType, resultValue = tmp[0], tmp[1]
+ else:
+ resultType = None
+ resultValue = tmp[0]
+ if resultType == 'STRING': # strip quotes of the string
+ resultValue = resultValue[1:-1]
+ index = oid.strip().split('.')
+ column = int(index.pop())
+ row = int(index.pop())
+ if not myTable.has_key(column):
+ myTable[column] = {}
+ myTable[column][row] = resultValue
+ return myTable
+
+
+def check_powermodules():
+ powermodules = getTable('1.3.6.1.4.1.2.3.51.2.2.4')
+ index = 1
+ exists = 2
+ status = 3
+ details = 4
+ num_ok = 0
+ num_no = 0
+ for i in powermodules.values():
+ myIndex = i[index]
+ myStatus = i[status]
+ myDetails = i[details]
+ myExists = i[exists]
+ if myIndex == opts.exclude:
+ continue
+ if myExists == "0":
+ num_no = num_no + 1
+ else:
+ if myStatus != "1":
+ nagios_status(warning)
+ add_summary('Powermodule "%s" status "%s". %s. ' %
+ (myIndex, myStatus, myDetails))
+ else:
+ num_ok = num_ok + 1
+ add_long('Powersupply "%s" status "%s". %s. ' %
+ (myIndex, myStatus, myDetails))
+ add_summary("%s out of %s powermodules are healthy" %
+ (num_ok, len(powermodules)))
+ add_perfdata("'Number of powermodules'=%s" %
+ (len(powermodules) - num_no))
+
+ nagios_status(ok)
+
+
+def check_switchmodules():
+ switchmodules = getTable("1.3.6.1.4.1.2.3.51.2.22.3.1.1")
+ # The following oid is undocumented, but contains some useful extra info
+ try:
+ extrainfo = getTable("1.3.6.1.4.1.2.3.51.2.22.3.1.7").values()
+ except:
+ extrainfo = []
+ for module in switchmodules.values():
+ myIndex = module[1]
+ healthstate = module[15]
+ resultavailable = module[3]
+ resultvalue = module[4]
+ enabledisable = module[6]
+ if resultavailable == "1":
+ 'this module is installed'
+ if healthstate == "1":
+ nagios_status(ok)
+ add_long("Module%s health good.\n post=%s" %
+ (myIndex, resultvalue))
+ else:
+ nagios_status(warning)
+ add_long("Module%s health bad(%s).\n post=%s" %
+ (myIndex, healthstate, resultvalue))
+ add_summary("Problem with Module %s. " % (myIndex))
+ if len(extrainfo) > int(myIndex):
+ try:
+ myExtraInfo = extrainfo[int(myIndex) - 1]
+ module_type = myExtraInfo[22]
+ module_ip = myExtraInfo[6]
+ add_long(" type=%s ip=%s" % (module_type, module_ip))
+ except:
+ pass
+ if exit_status == ok:
+ add_summary("All switchmodules healthy")
+
+
+def check_blowers():
+ " Check blower status "
+ blowers = getTable("1.3.6.1.4.1.2.3.51.2.2.3")
+ # This mib only seems to support 2 blowers.
+ blower1speed = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.1.0")
+ blower1state = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.10.0")
+
+ blower2speed = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.2.0")
+ blower2state = snmpget("1.3.6.1.4.1.2.3.51.2.2.3.11.0")
+
+ add_long("Blower 1 state=%s speed=%s" % (blower1state, blower1speed))
+ add_long("Blower 2 state=%s speed=%s" % (blower2state, blower2speed))
+ add_perfdata("blower1=%s" % (blower1speed.split(None, 1)[0]))
+ add_perfdata("blower2=%s" % (blower2speed.split(None, 1)[0]))
+ # Check blower 1
+ if blower1state == "1":
+ nagios_status(ok)
+ add_summary("Blower1 OK. ")
+ else:
+ add_summary("Blower1 NOT OK. ")
+ nagios_status(warning)
+
+ # Check blower 2
+ if blower2state == "1":
+ nagios_status(ok)
+ add_summary("Blower2 OK. ")
+ else:
+ add_summary("Blower2 NOT OK. ")
+ nagios_status(warning)
+
+ if blower1state != "1" and blower2state != "1":
+ nagios_status(critical)
+
+
+def check_chassis_status():
+ chassis = getTable('1.3.6.1.4.1.2.3.51.2.2.5.2')
+ oids = chassis.values()[0]
+ chassis_oid = {
+ 1: "bistSdram",
+ 10: "bistBootRomFlashImage",
+ 11: "bistEthernetPort1",
+ 113: "bistSwitchModulesCommunicating",
+ 12: "bistEthernetPort2",
+ 13: "bistInternalPCIBus",
+ 14: "bistExternalI2CDevices",
+ 15: "bistUSBController",
+ 16: "bistVideoCompressorBoard",
+ 17: "bistPrimaryBus",
+ 18: "bistInternalEthernetSwitch",
+ 2: "bistRs485Port1",
+ 3: "bistRs485Port2",
+ 33: "bistBladesInstalled",
+ 4: "bistNvram",
+ 49: "bistBladesCommunicating",
+ 6: "bistRtc",
+ 65: "bistBlowersInstalled",
+ 7: "bistLocalI2CBus",
+ 73: "bistBlowersFunctional",
+ 74: "bistMediaTrayInstalled",
+ 75: "bistMediaTrayCommunicating",
+ 8: "bistPrimaryMainAppFlashImage",
+ 81: "bistPowerModulesInstalled",
+ 89: "bistPowerModulesFunctional",
+ 9: "bistSecondaryMainAppFlashImage",
+ 97: "bistSwitchModulesInstalled",
+ }
+
+ # Check if all blades are working
+ bistBladesInstalled = 33
+ bistBlowersInstalled = 65
+ bistMediaTrayInstalled = 74
+ bistPowerModulesInstalled = 81
+ bistSwitchModulesInstalled = 97
+
+ bistSwitchModulesCommunicating = 113
+ bistBladesCommunicating = 49
+ bistMediaTrayCommunicating = 75
+ bistBlowersFunctional = 73
+ bistPowerModulesFunctional = 89
+
+ # Check Blade Communications
+ if not oids.has_key(bistBladesInstalled) or not oids.has_key(bistBladesCommunicating):
+ add_summary("Blades N/A. ")
+ elif oids[bistBladesInstalled] == oids[bistBladesCommunicating]:
+ nagios_status(ok)
+ add_summary("Blades OK. ")
+ else:
+ nagios_status(warning)
+ add_summary("Blades NOT OK. ")
+ # Check PowerModule Status
+ if not oids.has_key(bistPowerModulesFunctional) or not oids.has_key(bistPowerModulesInstalled):
+ add_summary("Powermodules N/A. ")
+ elif oids[bistPowerModulesFunctional] == oids[bistPowerModulesInstalled]:
+ nagios_status(ok)
+ add_summary("PowerModules OK. ")
+ else:
+ nagios_status(warning)
+ add_summary("PowerModules NOT OK. ")
+
+ # Check SwitcModule Communications
+ if not oids.has_key(bistSwitchModulesCommunicating) or not oids.has_key(bistSwitchModulesInstalled):
+ add_summary("SwitchModules N/A. ")
+ if oids[bistSwitchModulesCommunicating] == oids[bistSwitchModulesInstalled]:
+ nagios_status(ok)
+ add_summary("Switchmodules OK. ")
+ else:
+ nagios_status(warning)
+ add_summary("Switchmodules NOT OK. ")
+ # Check blower status
+ if not oids.has_key(bistBlowersInstalled) or not oids.has_key(bistBlowersFunctional):
+ add_summary("Blowers N/A. ")
+ elif oids[bistBlowersInstalled] == oids[bistBlowersFunctional]:
+ nagios_status(ok)
+ add_summary("Blowers OK. ")
+ else:
+ nagios_status(warning)
+ add_summary("Blowers NOT OK. ")
+ # Check Media Tray Status
+ if not oids.has_key(bistMediaTrayCommunicating) or not oids.has_key(bistMediaTrayInstalled):
+ nagios_status(ok)
+ add_summary("Media Trays N/A. ")
+ elif oids[bistMediaTrayCommunicating] == oids[bistMediaTrayInstalled]:
+ add_summary("Media Trays OK. ")
+ else:
+ nagios_status(warning)
+ add_summary("Media Trays NOT OK. ")
+
+ # status_oids, oids that where 0 == ok
+ status_oids = (2, 3, 5, 7, 8, 9, 10, 11, 14, 18,
+ 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, )
+
+ add_long("Other Sensors: ")
+ sensor_status = ok
+ for oid in status_oids:
+ if not chassis_oid.has_key(oid):
+ continue
+ oidValue = oids[oid]
+ oidName = chassis_oid[oid]
+ if oidValue == "0":
+ friendly_status = "%s (ok)" % oidValue
+ else:
+ friendly_status = "%s (not ok)" % oidValue
+ nagios_status(warning)
+ sensor_status = warning
+ add_summary("%s is %s" % oidName, friendly_status)
+ add_long(" %s status: %s" % (oidName, friendly_status))
+ if sensor_status == ok:
+ add_summary("Other Sensors: OK. ")
+
+
+def check_bladehealth():
+ blades = getTable('1.3.6.1.4.1.2.3.51.2.22.1.5.2.1')
+ bladestate = getTable('1.3.6.1.4.1.2.3.51.2.22.1.5.1.1').values()
+
+ index, bladeid, severity, description = (1, 2, 3, 4)
+ good_blades = 0
+ total_blades = 0
+ for i, row in enumerate(blades.values()):
+ myIndex = row[index]
+ myBladeid = row[bladeid]
+ mySeverity = row[severity]
+ myDescription = row[description]
+ try:
+ myName = bladestate[i][6]
+ except:
+ myName = ""
+ if mySeverity == "(No severity)":
+ continue
+ add_long("blade%s (%s): %s %s" %
+ (myBladeid, myName, mySeverity, myDescription))
+ if opts.exclude:
+ if myDescription.find(opts.exclude) > -1:
+ continue
+ total_blades += 1
+ if mySeverity == 'Good':
+ nagios_status(ok)
+ good_blades += 1
+ else:
+ nagios_status(warning)
+ add_summary("blade%s (%s): %s %s. " %
+ (myBladeid, myName, mySeverity, myDescription))
+ if good_blades == total_blades:
+ add_summary("%s out of %s blades in Good health. " %
+ (good_blades, total_blades))
+ nagios_status(ok)
+ else:
+ nagios_status(warning)
+
+
+def check_systemhealth():
+ systemhealthstat = snmpget('1.3.6.1.4.1.2.3.51.2.2.7.1.0')
+ summary = getTable('1.3.6.1.4.1.2.3.51.2.2.7.2.1')
+ index, severity, description, date = (1, 2, 3, 4)
+ # Sometimes chassis delivers warning when absolutely nothing is going on.
+ # Lets work around that
+ workaround = [{1: '1', 2: 'Good', 3: 'No critical or warning events', 4: 'No timestamp'}]
+ # Check overall health
+ if systemhealthstat == '255':
+ nagios_status(ok)
+ add_summary("Bladecenter health: OK. ")
+ elif summary.values() == workaround:
+ add_summary("Non-Critical Error (bug in firmware): '%s' " %
+ workaround[0][description])
+ nagios_status(ok)
+ return
+ elif systemhealthstat == "2":
+ nagios_status(warning)
+ add_summary("Non-Critical Error. ")
+ elif systemhealthstat == "4":
+ nagios_status(critical)
+ add_summary("System-Level Error. ")
+ elif systemhealthstat == "0":
+ nagios_status(critical)
+ add_summary("Critical. ")
+ else:
+ nagios_status(unknown)
+ add_summary(
+ "Bladecenter health unkown (oid 1.3.6.1.4.1.2.3.51.2.2.7.1.0 returns %s). " %
+ systemhealthstat)
+ for row in summary.values():
+ if row[severity] == 'Good':
+ nagios_status(ok)
+ elif row[severity] == 'Warning':
+ nagios_status(warning)
+ elif row[severity] == 'System Level':
+ nagios_status(warning)
+ else:
+ nagios_status(critical)
+ add_summary("%s. " % (row[description]))
+ add_long("* %s. " % (row[description]))
+
+
+def check_temperature():
+ # set some sensible defaults
+ if opts.warning_threshold is None:
+ opts.warning_threshold = 28
+ if opts.critical_threshold is None:
+ opts.critical_threshold = 35
+ str_temp = snmpget('1.3.6.1.4.1.2.3.51.2.2.1.5.1.0')
+ float_temp, measurement = str_temp.split(None, 1)
+ float_temp = float(float_temp)
+ if opts.critical_threshold is not None and float_temp > opts.critical_threshold:
+ nagios_status(critical)
+ add_summary(
+ "ambient temperature (%s) is over critical thresholds (%s). " %
+ (str_temp, opts.critical_threshold))
+ elif opts.warning_threshold is not None and float_temp > opts.warning_threshold:
+ nagios_status(warning)
+ add_summary(
+ "ambient temperature (%s) is over warning thresholds (%s). " %
+ (str_temp, opts.warning_threshold))
+ else:
+ add_summary("Ambient temperature = %s. " % (str_temp))
+ add_perfdata("'ambient_temp'=%s;%s;%s " %
+ (float_temp, opts.warning_threshold, opts.critical_threshold))
+ #add_long( "Ambient Temperature = %s" % (str_temp) )
+ nagios_status(ok)
+
+
+if __name__ == '__main__':
+ try:
+ set_snmp_options()
+ if opts.mode == 'powermodules':
+ check_powermodules()
+ elif opts.mode == 'system-health':
+ check_systemhealth()
+ elif opts.mode == 'temperature':
+ check_temperature()
+ elif opts.mode == 'chassis-status':
+ check_chassis_status()
+ elif opts.mode == 'bladehealth':
+ check_bladehealth()
+ elif opts.mode == 'blowers':
+ check_blowers()
+ elif opts.mode == 'switchmodules':
+ check_switchmodules()
+ else:
+ parser.error("%s is not a valid option for --mode" % opts.mode)
+ except Exception, e:
+ print "Unhandled exception while running script: %s" % e
+ exit(unknown)
+ end()
diff --git a/sensu/files/checks/check_ibm_storwize.py b/sensu/files/checks/check_ibm_storwize.py
new file mode 100644
index 0000000..63cdc55
--- /dev/null
+++ b/sensu/files/checks/check_ibm_storwize.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python
+
+from pynag.Plugins import PluginHelper, ok, warning, critical, unknown
+from pynag.Utils import runCommand
+from collections import namedtuple
+
+valid_queries = "lsarray lsdrive lsenclosurebattery lsenclosurecanister lsenclosurepsu lsenclosureslot lsenclosure lsmdiskgrp lsmdskgrp lsmgrp lsrcrelationship lsvdisk"
+
+p = PluginHelper()
+p.add_option("-H", "--hostname", '-M', help="Hostname or ip address", dest="hostname")
+p.add_option("-U", "--user", help="Log in as this user to storwize", dest="user", default="nagios")
+p.add_option("-Q", "--query", help="Query to send to storwize (see also -L)", dest="query", default="lsarray")
+p.add_option("-L", "--list-queries", help="List of valid queries", dest="list_queries", action="store_true")
+p.add_option("--test", help="Run this plugin in test mode", dest="test", action="store_true")
+
+p.parse_arguments()
+
+if p.options.list_queries is True:
+ p.parser.error("Valid Queries: %s" % valid_queries)
+if not p.options.hostname:
+ p.parser.error("Required options -H is missing")
+if p.options.query not in valid_queries.split():
+ p.parser.error("%s does not look like a valid query. Use -L for a list of valid queries" % p.options.query)
+
+query = p.options.query
+
+
+# Connect to remote storwize and run a connect
+def run_query():
+ """ Connect to a remote storwize box and run query """
+ command = "ssh %s@%s %s -delim ':'" % (p.options.user, p.options.hostname, p.options.query)
+ if p.options.test:
+ command = "cat %s.txt" % (p.options.query)
+ return_code, stdout, stderr = runCommand(command)
+
+ if return_code != 0:
+ p.status(unknown)
+ p.add_summary("Got error %s when trying to log into remote storwize box" % return_code)
+ p.add_long_output("\ncommand:\n===============\n%s" % command)
+ p.add_long_output("\nStandard output:\n==============\n%s" % (stdout))
+ p.add_long_output("\nStandard stderr:\n==============\n%s" % (stderr))
+ p.exit()
+ if stderr:
+ p.status(unknown)
+ p.add_summary("Error when connecting to storwize: %s" % stderr)
+ p.exit()
+
+ # Parse the output of run query and return a list of "rows"
+ lines = stdout.splitlines()
+ top_line = lines.pop(0)
+ headers = top_line.split(':')
+ Row = namedtuple('Row', ' '.join(headers))
+ rows = []
+ for i in lines:
+ i = i.strip()
+ columns = i.split(':')
+ row = Row(*columns)
+ rows.append(row)
+ return rows
+
+
+def check_lsmdiskgrp():
+ p.add_summary("%s diskgroups found" % (len(rows)))
+ p.add_metric("number of groups", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("group %s is %s." % (row.name, row.status))
+ p.add_long_output("%s: used: %s out of %s" % (row.name, row.used_capacity, row.capacity))
+ # Add a performance metric
+ metric_name = "%s_capacity" % row.name
+ p.add_metric(metric_name, value=row.used_capacity, max=row.capacity)
+
+
+def check_lsdrive():
+ p.add_summary("%s drives found" % (len(rows)))
+ p.add_metric("number of drives", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("drive %s is %s" % (row.id, row.status))
+
+
+def check_lsmgrp():
+ p.add_summary("%s groups found" % (len(rows)))
+ p.add_metric("number of groups", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("group %s is %s" % (row.name, row.status))
+
+
+def check_lsenclosurebattery():
+ p.add_summary("%s batteries found" % (len(rows)))
+ p.add_metric("number of batteries", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("battery %s:%s is %s" % (row.enclosure_id, row.battery_id, row.status))
+
+
+def check_lsenclosurecanister():
+ p.add_summary("%s canisters found" % (len(rows)))
+ p.add_metric("number of canisters", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("canister %s:%s is %s" % (row.enclosure_id, row.canister_id, row.status))
+
+
+def check_lsenclosurepsu():
+ p.add_summary("%s psu found" % (len(rows)))
+ p.add_metric("number of psu", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("psu %s:%s is %s" % (row.enclosure_id, row.PSU_id, row.status))
+
+
+def check_lsenclosure():
+ p.add_summary("%s enclosures found" % (len(rows)))
+ p.add_metric("number of enclosures", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("enclosure %s is %s" % (row.id, row.status))
+
+
+def check_lsenclosureslot():
+ p.add_summary("%s slots found" % (len(rows)))
+ p.add_metric("number of slots", len(rows))
+ for row in rows:
+ if row.port_1_status != 'online':
+ p.status(critical)
+ p.add_summary("port1 on slot %s:%s is %s" % (row.enclosure_id, row.slot_id, row.port_1_status))
+ if row.port_2_status != 'online':
+ p.status(critical)
+ p.add_summary("port2 on slot %s:%s is %s" % (row.enclosure_id, row.slot_id, row.port_2_status))
+
+
+def check_lsrcrelationship():
+ p.add_summary("%s cluster relationships found" % (len(rows)))
+ p.add_metric("number of relationships", len(rows))
+ for row in rows:
+ if row.state != 'consistent_synchronized':
+ p.status(critical)
+ p.add_summary("%s is %s" % (row.consistency_group_name, row.state))
+
+
+def check_lsvdisk():
+ p.add_summary("%s disks found" % (len(rows)))
+ p.add_metric("number of disks", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.status(critical)
+ p.add_summary("disk %s is %s" % (row.name, row.status))
+
+
+def check_lsarray():
+ p.add_summary("%s arrays found" % (len(rows)))
+ p.add_metric("number of arrays", len(rows))
+ for row in rows:
+ if row.status != 'online':
+ p.add_summary("array %s is %s." % (row.mdisk_name, row.status))
+ p.status(critical)
+ if row.raid_status != 'online':
+ p.add_summary("array %s has raid status %s." % (row.mdisk_name, row.raid_status))
+ p.status(critical)
+ # Add some performance metrics
+ metric_name = row.mdisk_name + "_capacity"
+ p.add_metric(metric_name, value=row.capacity)
+
+# Run our given query, and parse the output
+rows = run_query()
+
+if query == 'lsmdiskgrp':
+ check_lsmdiskgrp()
+elif query == 'lsarray':
+ check_lsarray()
+elif query == 'lsdrive':
+ check_lsdrive()
+elif query == 'lsvdisk':
+ check_lsvdisk()
+elif query == 'lsmgrp':
+ check_lsmgrp()
+elif query == 'lsenclosure':
+ check_lsenclosure()
+elif query == 'lsenclosurebattery':
+ check_lsenclosurebattery()
+elif query == 'lsenclosurecanister':
+ check_lsenclosurecanister()
+elif query == 'lsenclosurepsu':
+ check_lsenclosurepsu()
+elif query == 'lsrcrelationship':
+ check_lsrcrelationship()
+elif query == 'lsenclosureslot':
+ check_lsenclosureslot()
+else:
+ p.status(unknown)
+ p.add_summary("unsupported query: %s. See -L for list of valid queries" % query)
+ p.exit()
+
+# Check metrics and exit
+p.check_all_metrics()
+p.exit()
+
diff --git a/sensu/files/checks/check_ibm_svc.pl b/sensu/files/checks/check_ibm_svc.pl
new file mode 100644
index 0000000..aab36a0
--- /dev/null
+++ b/sensu/files/checks/check_ibm_svc.pl
@@ -0,0 +1,667 @@
+#!/usr/bin/perl -w
+# nagios: +epn
+#
+# $Id: check_ibm_svc.pl 352 2013-12-28 19:14:19Z u09422fra $
+#
+# IBM SVC health status plugin for Nagios. Needs wbemcli to query
+# the SVC clusters CIMOM server.
+#
+
+use strict;
+use Getopt::Std;
+#use XML::LibXML;
+use Time::Local;
+
+#
+# Variables
+#
+my %conf = (
+# wbemcli => '/opt/sblim-wbemcli/bin/wbemcli',
+ wbemcli => '/usr/bin/wbemcli',
+ wbemcli_opt => '-noverify -nl',
+ SNAME => {
+ BackendController => 'BE Ctrl',
+ BackendTargetSCSIProtocolEndpoint => 'BE Target',
+ BackendVolume => 'BE Volume',
+ Cluster => 'Cluster',
+ ConcreteStoragePool => 'Storage Pool',
+ EthernetPort => 'Ethernet Port',
+ FCPort => 'FC Port',
+ FCPortStatistics => 'FC Port Stats',
+ IOGroup => 'I/O Group',
+ MasterConsole => 'Master Console',
+ MirrorExtent => 'VDisk Mirrors',
+ Node => 'Node',
+ QuorumDisk => 'Quorum Disk',
+ StorageVolume => 'Storage Volume' },
+ RC => {
+ OK => '0',
+ WARNING => '1',
+ CRITICAL => '2',
+ UNKNOWN => '3' },
+ STATUS => {
+ 0 => 'OK',
+ 1 => 'WARNING',
+ 2 => 'CRITICAL',
+ 3 => 'UNKNOWN' }
+);
+# A hash map of CIMOM return codes to human readable strings according to the "V6.4.0 CIM Agent
+# Developer's Guide for IBM System Storage SAN Volume Controller" and the "Managed Object Format
+# Documents" in particular.
+# The 'default' hash tree referes to mappings used commonly.
+my %rcmap_default = (
+ OperationalStatus => {
+ 0 => 'Unknown',
+ 1 => 'Other',
+ 2 => 'OK',
+ 3 => 'Degraded',
+ 4 => 'Stressed',
+ 5 => 'Predictive Failure',
+ 6 => 'Error',
+ 7 => 'Non-Recoverable Error',
+ 8 => 'Starting',
+ 9 => 'Stopping',
+ 10 => 'Stopped',
+ 11 => 'In Service',
+ 12 => 'No Contact',
+ 13 => 'Lost Communication',
+ 14 => 'Aborted',
+ 15 => 'Dormant',
+ 16 => 'Supporting Entity in Error',
+ 17 => 'Completed',
+ 18 => 'Power Mode',
+ 32768 => 'Vendor Reserved'
+ }
+);
+my %rcmap = (
+ BackendController => {
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+ BackendVolume => {
+ Access => {
+ 0 => 'Unknown',
+ 1 => 'Readable',
+ 2 => 'Writeable',
+ 3 => 'Read/Write Supported',
+ 4 => 'Write Once'
+ },
+ NativeStatus => {
+ 0 => 'Offline',
+ 1 => 'Online',
+ 2 => 'Degraded',
+ 3 => 'Excluded',
+ 4 => 'Degraded Paths',
+ 5 => 'Degraded Port Errors'
+ },
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+ ConcreteStoragePool => {
+ NativeStatus => {
+ 0 => 'Offline',
+ 1 => 'Online',
+ 2 => 'Degraded',
+ 3 => 'Excluded',
+ 4 => 'Degraded Paths',
+ 5 => 'Degraded Port Errors'
+ },
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+ Cluster => {
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+ EthernetPort => {
+ OperationalStatus => {
+ 0 => 'unknown',
+ 1 => 'Other',
+ 2 => 'OK',
+ 6 => 'Error',
+ 10 => 'Stopped',
+ 11 => 'In Service'
+ }
+ },
+ FCPort => {
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+ MasterConsole => {
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+ MirrorExtent => {
+ Status => {
+ 0 => 'Offline',
+ 1 => 'Online'
+ },
+ Sync => {
+ TRUE => 'In sync',
+ FALSE => 'Out of sync'
+ }
+ },
+ Node => {
+ NativeStatus => {
+ 0 => 'Offline',
+ 1 => 'Online',
+ 2 => 'Pending',
+ 3 => 'Adding',
+ 4 => 'Deleting',
+ 5 => 'Flushing'
+ },
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+ StorageVolume => {
+ CacheState => {
+ 0 => 'Empty',
+ 1 => 'Not empty',
+ 2 => 'Corrupt',
+ 3 => 'Repairing'
+ },
+ NativeStatus => {
+ 0 => 'Offline',
+ 1 => 'Online',
+ 2 => 'Degraded',
+ 3 => 'Formatting'
+ },
+ OperationalStatus => $rcmap_default{'OperationalStatus'}
+ },
+);
+my %output = (
+ perfStr => '',
+ retRC => $conf{'RC'}{'OK'},
+ retStr => '',
+);
+
+#
+# Functions
+#
+# Command line processing
+# Takes: reference to conf hash
+# Returns: nothing
+sub cli {
+ my ($cfg) = @_;
+ my %opts;
+ my $optstring = "C:H:P:c:hp:u:w:";
+ getopts( "$optstring", \%opts) or usage();
+ usage() if ( $opts{h} );
+ if ( exists $opts{H} && $opts{H} ne '' ) {
+ $$cfg{'host'} = $opts{H};
+ if ( exists $opts{P} && $opts{P} ne '' ) {
+ $$cfg{'port'} = $opts{P};
+ } else {
+ $$cfg{'port'} = '5989';
+ }
+ if ( exists $opts{u} && $opts{u} ne '' && exists $opts{p} && $opts{p} ne '' ) {
+ $$cfg{'user'} = $opts{u};
+ $$cfg{'password'} = $opts{p};
+ if ( exists $opts{C} && $opts{C} ne '' ) {
+ if ( $opts{C} eq 'BackendTargetSCSIPE' ) {
+ $$cfg{'check'} = 'BackendTargetSCSIProtocolEndpoint';
+ } else {
+ $$cfg{'check'} = $opts{C};
+ }
+ } else {
+ usage();
+ }
+ } else {
+ usage();
+ }
+ if ( exists $opts{c} && $opts{c} ne '' ) {
+ $$cfg{'critical'} = $opts{c};
+ } else {
+ if ( $$cfg{'check'} eq "IOGroup" ) {
+ usage();
+ }
+ }
+ if ( exists $opts{w} && $opts{w} ne '' ) {
+ $$cfg{'warning'} = $opts{w};
+ } else {
+ if ( $$cfg{'check'} eq "IOGroup" ) {
+ usage();
+ }
+ }
+ } else {
+ usage();
+ }
+}
+
+#
+# Query SVC for check output
+# Takes: reference to conf and output hash
+# Returns: nothing
+sub querySVC {
+ my ($cfg, $out, $rcmap) = @_;
+ my $objectPath = "https://$$cfg{'user'}:$$cfg{'password'}\@$$cfg{'host'}:$$cfg{'port'}/root/ibm:IBMTSSVC_$$cfg{'check'}";
+ open( WBEMCLI, "-|", "$$cfg{'wbemcli'} $$cfg{'wbemcli_opt'} ei \'$objectPath\'" ) or die "Can't fork\n";
+
+ my %obj;
+ my $obj_begin;
+ my $prop_name = '';
+ my $prop_value = '';
+ my $inst_count = 0;
+ my $inst_count_half = 0;
+ my $inst_count_nok = 0;
+ my $inst_count_ok = 0;
+ my $path_count = 0;
+ my $path_count_max = 0;
+ my $path_count_half = 0;
+ my $quorum_active = '';
+ while( my $line = <WBEMCLI> ) {
+ if ( ( $line =~ /^$$cfg{'host'}:$$cfg{'port'}\/root\/ibm:IBMTSSVC_$$cfg{'check'}\.(.*)$/ ) == 1 ) {
+ $obj_begin = 1;
+ }
+ elsif ( ( ( $prop_name, $prop_value ) = $line =~ /^-(.*)=(.*)$/ ) == 2 ) {
+ $prop_value =~ s/"//g;
+ $obj{$prop_name} = $prop_value;
+ }
+ elsif ( $line =~ /^\s*$/ && $obj_begin == 1 ) {
+ $obj_begin = 0;
+ $inst_count++;
+ # This should be the end of the paragraph/instance so we should
+ # have gathered all properties at this point
+
+ # Controller on the backend of the clusters FC. BackendControllers control the
+ # BackendVolumes that are needed to form StoragePools in the SAN Volume Controller.
+ # Check for:
+ # OperationalStatus
+ #
+ if ( $$cfg{'check'} eq 'BackendController' ) {
+ if ( $obj{'OperationalStatus'} != 2 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'BackendController'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ # A SCSIProtocolEndpoint represents the protocol (command) aspects of a logical
+ # SCSI port, independent of the connection/transport. SCSIProtocolEndpoint is
+ # either directly or indirectly associated with one or more instances of LogicalPort
+ # (via PortImplementsEndpoint) depending on the underlying transport. Indirect
+ # associations aggregate one or more LogicalPorts using intermediate Protocol-
+ # Endpoints (iSCSI, etc). SCSIProtocolEndpoint is also associated to a SCSIProtocol-
+ # Controller, representing the SCSI device. This is impelementation that represents
+ # the SCSIProtocolEndpoint (RemoteServiceAccessPoint) of the Backend Storage.
+ #
+ # Check for:
+ # Status
+ #
+ elsif ( $$cfg{'check'} eq 'BackendTargetSCSIProtocolEndpoint' ) {
+ if ( $obj{'Status'} ne 'Active' ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$cfg{'STATUS'}{$$out{'retRC'}},$obj{'Name'})";
+ $$out{'retRC'} = $$cfg{'RC'}{'WARNING'};
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ # A BackendVolume is a SCSI LUN which is exposed on the fabric by a Storage
+ # Controller (typically a RAID array) to the SAN Volume Controller. It can
+ # be a raid array made from local drives or it can be an logical unit from
+ # a external SAN attached controller that SVC manages.
+ #
+ # In other words, these are the SVC MDisks
+ #
+ # Check for:
+ # Access, NativeStatus, OperationalStatus, Path count
+ #
+ elsif ( $$cfg{'check'} eq 'BackendVolume' ) {
+ if ( $obj{'MaxPathCount'} ne '' ) {
+ $path_count_max = $obj{'MaxPathCount'};
+ $path_count_half = $obj{'MaxPathCount'}/2;
+ }
+ if ( $obj{'PathCount'} ne '' ) {
+ $path_count = $obj{'PathCount'};
+ }
+ if ( $obj{'OperationalStatus'} != 2 || $obj{'NativeStatus'} != 1 || $path_count <= $path_count_half ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'BackendVolume'}{'NativeStatus'}{$obj{'NativeStatus'}},$$rcmap{'BackendVolume'}{'OperationalStatus'}{$obj{'OperationalStatus'}},Paths:$path_count/$path_count_max)";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } elsif ( $obj{'Access'} != 3 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'BackendVolume'}{'Access'}{$obj{'Access'}})";
+ if ( $$out{'retRC'} != $$cfg{'RC'}{'CRITICAL'} ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'WARNING'};
+ }
+ $inst_count_nok++;
+ } elsif ( $path_count < $path_count_max ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}(Path: $path_count/$path_count_max)";
+ if ( $$out{'retRC'} != $$cfg{'RC'}{'CRITICAL'} ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'WARNING'};
+ }
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ # A group of between one and four Redundancy Groups therefore up to eight Nodes
+ # form a Cluster.
+ #
+ # Check for:
+ # OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'Cluster' ) {
+ if ( $obj{'OperationalStatus'} != 2 ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'Cluster'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ # A pool of Storage that is managed within the scope of a particular System.
+ # StoragePools may consist of component StoragePools or StorageExtents. Storage-
+ # Extents that belong to the StoragePool have a Component relationship to the
+ # StoragePool. StorageExtents/StoragePools that are elements of a pool have
+ # their available space aggregated into the pool. StoragePools and Storage-
+ # Volumes may be created from StoragePools. This is indicated by the Allocated-
+ # FromStoragePool association. StoragePool is scoped to a system by the Hosted-
+ # StoragePool association.
+ # For SVC concrete storage pools, this corresponds to a Managed Disk Group from
+ # which Virtual Disks can be allocated. SVC concrete StoragePools are not pre-
+ # configured and must be created by the storage administrator.
+ #
+ # In other words, these are the SVC MDiskGroups
+ #
+ # Check for:
+ # NativeStatus, OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'ConcreteStoragePool' ) {
+ if ( $obj{'OperationalStatus'} != 2 || $obj{'NativeStatus'} != 1 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'ConcreteStoragePool'}{'NativeStatus'}{$obj{'NativeStatus'}},$$rcmap{'ConcreteStoragePool'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ $$out{'perfStr'} .= " cap_$obj{'ElementName'}=$obj{'UsedCapacity'};;;;$obj{'TotalManagedSpace'}";
+ $$out{'perfStr'} .= " md_$obj{'ElementName'}=$obj{'NumberOfBackendVolumes'};;;;";
+ $$out{'perfStr'} .= " vd_$obj{'ElementName'}=$obj{'NumberOfStorageVolumes'};;;;";
+ }
+ #
+ # Ethernet port of a SVC node.
+ #
+ # Check for:
+ # OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'EthernetPort' ) {
+ if ( $obj{'OperationalStatus'} != 2 && $obj{'OperationalStatus'} != 11 ) {
+ $$out{'retStr'} .= " MAC:$obj{'PermanentAddress'}($$rcmap{'EthernetPort'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ if ( $$out{'retRC'} != $$cfg{'RC'}{'CRITICAL'} ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'WARNING'};
+ }
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ $inst_count_half = $inst_count/2;
+ if ( $inst_count_ok < $inst_count_half ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ }
+ }
+ # Fibre-Channel port of a SVC node. Generally all FC ports of a SVC RedundancyGroup
+ # expose the same devices. Furthermore all FC ports of a SVC cluster share the same
+ # BackendVolumes.
+ #
+ # Check for:
+ # OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'FCPort' ) {
+ if ( $obj{'OperationalStatus'} != 2 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'FCPort'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ if ($$rcmap{'FCPort'}{'OperationalStatus'}{$obj{'OperationalStatus'}} == 'Stopped') {
+ $inst_count_ok++;
+ }
+ else {
+ $inst_count_nok++;
+ }
+ } else {
+ $inst_count_ok++;
+ }
+ $inst_count_half = $inst_count/2;
+
+ if ( $inst_count_ok < $inst_count ) {
+ if ( $$out{'retRC'} != $$cfg{'RC'}{'CRITICAL'} ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'WARNING'};
+ }
+ }
+
+ if ( $inst_count_ok < $inst_count_half ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ }
+ }
+ # FCPortStatistics is the statistics for the FCPort.
+ #
+ # Check for:
+ # -
+ #
+ elsif ( $$cfg{'check'} eq 'FCPortStatistics' ) {
+ my ($node, $port) = $obj{'ElementName'} =~ /^FCPort statistics for port (\d+) on node (\d+)/;
+ my %stats = (
+ BytesTransmitted => 'trans',
+ BytesReceived => 'recv',
+ LinkFailures => 'lf',
+ LossOfSignalCounter => 'losig',
+ LossOfSyncCounter => 'losync',
+ PrimitiveSeqProtocolErrCount => 'pspec',
+ CRCErrors => 'crc',
+ InvalidTransmissionWords => 'inval',
+ BBCreditZeroTime => 'bbzero'
+ );
+
+ $$out{'retStr'} = "OK";
+ foreach my $stat ( sort keys %stats) {
+ $$out{'perfStr'} .= " ".$stats{$stat}."_n".$node."p".$port."=".$obj{$stat}."c;;;;";
+ }
+ }
+ # A group containing two Nodes. An IOGroup defines an interface for a set of
+ # Volumes. All Nodes and Volumes are associated with exactly one IOGroup. The
+ # read and write cache provided by a node is duplicated for redundancy. When
+ # IO is performed to a Volume, the node that processes the IO will duplicate
+ # the data on the Partner node in the IOGroup. This class represents the system
+ # aspect of an IO group wheras IOGroupSet represents the set aspect.
+ #
+ # Check for:
+ # OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'IOGroup' ) {
+ my @mem_elements;
+ $inst_count--;
+ for my $mem ( 'FlashCopy', 'Mirror', 'RAID', 'RemoteCopy' ) {
+ my $mem_free = $mem."FreeMemory";
+ my $mem_total = $mem."TotalMemory";
+ $inst_count++;
+ if ( $obj{$mem_total} == 0 ) {
+ # For inactive memory metrics the value of "*TotalMemory" is zero, skip those.
+ $inst_count--;
+ } elsif ( $obj{$mem_free} <= $$cfg{'critical'} ) {
+ push (@mem_elements, "$mem:CRITICAL");
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } elsif ( $obj{$mem_free} <= $$cfg{'warning'} && $obj{$mem_free} > $$cfg{'critical'} ) {
+ push (@mem_elements, "$mem:WARNING");
+ if ( $$out{'retRC'} != $$cfg{'RC'}{'CRITICAL'} ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'WARNING'};
+ }
+ $inst_count_nok++;
+ } else {
+ push (@mem_elements, "$mem:OK");
+ $inst_count_ok++;
+ }
+ }
+ if ( @mem_elements ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}(".join(',', @mem_elements).")";
+ }
+
+ $$out{'perfStr'} .= " num_hosts_$obj{'ElementName'}=$obj{'NumberOfHosts'};;;;";
+ $$out{'perfStr'} .= " num_nodes_$obj{'ElementName'}=$obj{'NumberOfNodes'};;;;";
+ $$out{'perfStr'} .= " num_vol_$obj{'ElementName'}=$obj{'NumberOfVolumes'};;;;";
+ $$out{'perfStr'} .= " mem_fc_$obj{'ElementName'}=$obj{'FlashCopyFreeMemory'};;;0;$obj{'FlashCopyTotalMemory'}";
+ $$out{'perfStr'} .= " mem_mirr_$obj{'ElementName'}=$obj{'MirrorFreeMemory'};;;0;$obj{'MirrorTotalMemory'}";
+ $$out{'perfStr'} .= " mem_raid_$obj{'ElementName'}=$obj{'RAIDFreeMemory'};;;0;$obj{'RAIDTotalMemory'}";
+ $$out{'perfStr'} .= " mem_rc_$obj{'ElementName'}=$obj{'RemoteCopyFreeMemory'};;;0;$obj{'RemoteCopyTotalMemory'}";
+ }
+ # The SVC management web interface processes.
+ #
+ # Check for:
+ # OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'MasterConsole' ) {
+ if ( $obj{'OperationalStatus'} != 2 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'MasterConsole'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ # Represents a single vdisk copy. Each vdisk must have at least one copy and will
+ # have two copies if it is mirrored.
+ #
+ # Check for:
+ # Status, Sync
+ #
+ elsif ( $$cfg{'check'} eq 'MirrorExtent' ) {
+ if ( $obj{'Status'} != 1 || $obj{'Sync'} ne 'TRUE' ) {
+ $$out{'retStr'} .= " VDisk:$obj{'StorageVolumeID'},Copy:$obj{'CopyID'}($$rcmap{'MirrorExtent'}{'Status'}{$obj{'Status'}},$$rcmap{'MirrorExtent'}{'Sync'}{$obj{'Sync'}})";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ # A single SAN Volume Controller unit. Nodes work in pairs for redundancy. The
+ # pairs are associated by their IO Group. One or more Node pairs form a Cluster.
+ # When the Cluster is formed, one Node is designated the Config Node. This node
+ # is chosen automatically and it is this Node that binds to the Cluster IP address.
+ # This forms the Configuration Interface to the Cluster.
+ #
+ # Check for:
+ # NativeStatus, OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'Node' ) {
+ if ( $obj{'OperationalStatus'} != 2 || $obj{'NativeStatus'} != 1 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'Node'}{'NativeStatus'}{$obj{'NativeStatus'}},$$rcmap{'Node'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ # Represents a single candidate quorum disk. There is only ONE quorum disk but
+ # the cluster uses three disks as quorum candidate disks. The cluster will select
+ # the actual quorum disk from the pool of quorum candidate disks. When MDisks
+ # are added to the SVC cluster, it checks the MDisk to see if it can be used as
+ # a quorum disk. If the MDisk fulfils the demands, the SVC will assign the three
+ # first MDisks as quorum candidates, and one of them is selected as the active
+ # quorum disk.
+ #
+ # Check for:
+ # Active, Status
+ #
+ elsif ( $$cfg{'check'} eq 'QuorumDisk' ) {
+ if ( $obj{'Status'} ne 'online' ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($obj{'Status'})";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ if ( $obj{'Active'} ne 'FALSE' ) {
+ $quorum_active = $obj{'ElementName'};
+ }
+ }
+ # A device presented by the Cluster which can be mapped as a SCSI LUN to host
+ # systems on the SAN. A Volume is formed by allocating a set of Extents from a
+ # Pool. In SVC terms a VDisk
+ #
+ # Check for:
+ # CacheState, NativeStatus, OperationalStatus
+ #
+ elsif ( $$cfg{'check'} eq 'StorageVolume' ) {
+ if ( $obj{'OperationalStatus'} != 2 || $obj{'NativeStatus'} != 1 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'StorageVolume'}{'CacheState'}{$obj{'CacheState'}},$$rcmap{'StorageVolume'}{'NativeStatus'}{$obj{'NativeStatus'}},$$rcmap{'StorageVolume'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $inst_count_nok++;
+ } elsif ( $obj{'CacheState'} != 0 && $obj{'CacheState'} != 1 ) {
+ $$out{'retStr'} .= " $obj{'ElementName'}($$rcmap{'StorageVolume'}{'CacheState'}{$obj{'CacheState'}},$$rcmap{'StorageVolume'}{'NativeStatus'}{$obj{'NativeStatus'}},$$rcmap{'StorageVolume'}{'OperationalStatus'}{$obj{'OperationalStatus'}})";
+ if ( $$out{'retRC'} != $$cfg{'RC'}{'CRITICAL'} ) {
+ $$out{'retRC'} = $$cfg{'RC'}{'WARNING'};
+ }
+ $inst_count_nok++;
+ } else {
+ $inst_count_ok++;
+ }
+ }
+ }
+ else { next; }
+ }
+ close( WBEMCLI );
+
+ $$out{'retStr'} =~ s/^ //;
+ $$out{'retStr'} =~ s/,$//;
+ if ( $inst_count_ok != 0 && $inst_count != 0 ) {
+ if ( $$out{'retStr'} ne '' ) {
+ $$out{'retStr'} = " - $$out{'retStr'}";
+ }
+ $$out{'retStr'} = "Not OK:$inst_count_nok/OK:$inst_count_ok/Total:$inst_count".$$out{'retStr'};
+ }
+
+ # Special case: Check if at least one QuorumDisk was in the "active='TRUE'" state.
+ if ( $$cfg{'check'} eq 'QuorumDisk' ) {
+ if ( $quorum_active ne '' ) {
+ $$out{'retStr'} .= " - Active quorum on \"$quorum_active\"";
+ } else {
+ $$out{'retRC'} = $$cfg{'RC'}{'CRITICAL'};
+ $$out{'retStr'} .= " - No active quorum disk found";
+ }
+ }
+
+ $$out{'perfStr'} =~ s/^ //;
+ $$out{'perfStr'} =~ s/,$//;
+ if ( $$out{'perfStr'} ne '' ) {
+ $$out{'perfStr'} = "|".$$out{'perfStr'};
+ } else {
+ $$out{'perfStr'} = "|nok=$inst_count_nok;;;; ok=$inst_count_ok;;;; total=$inst_count;;;;";
+ }
+}
+
+#
+# Print usage
+# Takes: nothing
+# Returns: nothing
+sub usage {
+ (my $Me = $0) =~ s!.*/!!;
+ print STDOUT << "EOF";
+
+IBM SVC health status plugin for Nagios. Needs wbemcli to query
+the SVC clusters CIMOM server.
+
+Usage: $Me [-h] -H host [-P port] -u user -p password -C check [-c crit] [-w warn]
+
+Flags:
+ -C check Check to run. Currently available checks:
+ BackendController, BackendTargetSCSIPE, BackendVolume, Cluster,
+ ConcreteStoragePool, EthernetPort, FCPort, FCPortStatistics,
+ IOGroup*, MasterConsole, MirrorExtent, Node, QuorumDisk, StorageVolume
+ -H host Hostname of IP of the SVC cluster.
+ -P port CIMOM port on the SVC cluster.
+ -c crit Critical threshold (only for checks with '*')
+ -h Print this help message.
+ -p Password for CIMOM access on the SVC cluster.
+ -u User with CIMOM access on the SVC cluster.
+ -w warn Warning threshold (only for checks with '*')
+
+EOF
+ exit;
+}
+
+#
+# Main
+#
+# Get command-line options
+cli(\%conf);
+
+# Query SVC for check output
+querySVC(\%conf, \%output, \%rcmap);
+
+print uc($conf{'SNAME'}{$conf{'check'}})." $conf{'STATUS'}{$output{'retRC'}} - $output{'retStr'}$output{'perfStr'}\n";
+exit $output{'retRC'};
+
+#
+## EOF
diff --git a/sensu/files/checks/check_keystone_api b/sensu/files/checks/check_keystone_api
new file mode 100644
index 0000000..cb5e7bb
--- /dev/null
+++ b/sensu/files/checks/check_keystone_api
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Keystone API monitoring script for Sensu
+#
+# Copyright © 2013 eNovance <licensing@enovance.com>
+#
+# Author: Emilien Macchi <emilien.macchi@enovance.com>
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+# Requirement: curl
+#
+set -e
+
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+STATE_DEPENDENT=4
+
+usage ()
+{
+ echo "Usage: $0 [OPTIONS]"
+ echo " -h Get help"
+ echo " -H <Auth URL> URL for obtaining an auth token"
+ echo " -U <username> Username to use to get an auth token"
+ echo " -P <password> Password to use ro get an auth token"
+}
+
+while getopts 'h:H:U:T:P:' OPTION
+do
+ case $OPTION in
+ h)
+ usage
+ exit 0
+ ;;
+ H)
+ export OS_AUTH_URL=$OPTARG
+ ;;
+ U)
+ export OS_USERNAME=$OPTARG
+ ;;
+ P)
+ export OS_PASSWORD=$OPTARG
+ ;;
+ *)
+ usage
+ exit 1
+ ;;
+ esac
+done
+
+if ! which curl >/dev/null 2>&1
+then
+ echo "curl is not installed."
+ exit $STATE_UNKNOWN
+fi
+
+START=`date +%s`
+TOKEN=$(curl -d '{"auth":{"passwordCredentials":{"username": "'$OS_USERNAME'", "password": "'$OS_PASSWORD'"}}}' -H "Content-type: application/json" ${OS_AUTH_URL}:5000/v2.0/tokens/ 2>&1 | grep token|awk '{print $8}'|grep -o '".*"' | sed -n 's/.*"\([^"]*\)".*/\1/p')
+END=`date +%s`
+
+TIME=$((END-START))
+
+if [ -z "$TOKEN" ]; then
+ echo "Unable to get a token"
+ exit $STATE_CRITICAL
+else
+ if [ $TIME -gt 10 ]; then
+ echo "Get a token after 10 seconds, it's too long."
+ exit $STATE_WARNING
+ else
+ echo "Got a token, Keystone API is working."
+ exit $STATE_OK
+ fi
+fi
diff --git a/sensu/files/checks/check_kvm.sh b/sensu/files/checks/check_kvm.sh
new file mode 100644
index 0000000..f40fb50
--- /dev/null
+++ b/sensu/files/checks/check_kvm.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Key for libvirt-client: command="sudo /srv/sensu/checks/check_virsh_list.sh $SSH_ORIGINAL_COMMAND",no-port-forwarding,no-x11-forwarding,no-agent-forwarding ssh-rsa ...
+#check_virsh_list.sh:
+#
+#!/bin/bash
+#case $1 in
+# list) virsh list --all;;
+# dumpxml) virsh dumpxml $2;;
+# *) echo invalid option;exit 1;;
+#esac
+#
+#Usage:
+# check_kvm.sh -i instance_id -u user_name -p user_password -t tenant_name -w auth_url
+
+
+while getopts ":i:u:p:t:w:" opt; do
+ case $opt in
+ i)
+ inst_id=${OPTARG};;
+ u)
+ user=${OPTARG};;
+ p)
+ passwd=${OPTARG};;
+ t)
+ tenant_name=${OPTARG};;
+ w)
+ auth_url=${OPTARG};;
+ \?)
+ echo "Invalid option";exit 1;;
+ : ) echo "Option -"$OPTARG" requires an argument." >&2
+ exit 1;;
+ esac
+done
+#keystone:
+export OS_USERNAME=$user
+export OS_PASSWORD=$passwd
+export OS_TENANT_NAME=$tenant_name
+export OS_AUTH_URL=$auth_url
+
+#Clear getopts variables
+for ((i=1 ; i <= 9 ; i++))
+do
+ shift
+done
+
+#Get hypervisor name
+hypervisor=`nova show $inst_id | grep hypervisor_hostname | awk '{print $4}'`
+
+#Get kvm instance name
+inst_name=`ssh -i /opt/sensu/.ssh/id_rsa libvirt-client@$hypervisor "dumpxml $inst_id" | grep '<name>'`
+#inst_name=`ssh root@$hypervisor "virsh dumpxml $inst_id | grep '<name>'"`
+inst_name=`awk '{gsub("<name>", "");print}' <<< $inst_name`
+inst_name=`awk '{gsub("</name>", "");print}' <<< $inst_name`
+
+#Get state
+state=`ssh -i /opt/sensu/.ssh/id_rsa libvirt-client@$hypervisor "list" | sed '1,2d' | sed '/^$/d'| awk '{print $2" "$3}' | grep $inst_name `
+#state=`ssh root@$hypervisor "virsh list --all | sed '1,2d' | sed '/^$/d'| awk '{print $2" "$3}' | grep $inst_name "`
+state=$(echo $state | awk -F" " '{print $2}')
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+ case "$state" in
+ running|blocked) OK=$(expr $OK + 1) ;;
+ paused) WARN=$(expr $WARN + 1) ;;
+ shutdown|shut*|crashed) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+ esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo hosts:$NUM OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_log.sh b/sensu/files/checks/check_log.sh
new file mode 100644
index 0000000..193d724
--- /dev/null
+++ b/sensu/files/checks/check_log.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+
+#check logs
+#usage: ./check_log.sh -p path_to_log -l check_for_last_x_minutes -r regular_expression_you_are_looking_for -c check_last_n_lines
+
+while getopts ":p:l:r:c:" opt; do
+ case $opt in
+ p)
+ LOGPATH=${OPTARG};;
+ l)
+ LAST=${OPTARG};;
+ r)
+ REGEXP=${OPTARG};;
+ c)
+ LINESCOUNT=${OPTARG};;
+ \?)
+ echo "Invalid option";exit 1;;
+ : ) echo "Option -"$OPTARG" requires an argument." >&2
+ exit 1;;
+ esac
+done
+
+NAME=$(echo "$LOGPATH" | sed "s/.*\///" | sed "s/\..*//")
+
+LASTCHANGED=$(expr `date +%s` - `stat -c %Y $LOGPATH`)
+
+LAST=$[LAST*60]
+
+if [[ $LASTCHANGED -gt $LAST ]]; then
+ echo No new lines in file.
+ exit 0
+fi
+
+tail -$LINESCOUNT $LOGPATH > LASTLINES_$NAME
+
+ERRCODE=0
+while read line; do
+ if [[ $line == *"$REGEXP"* ]]; then
+ EXITCODE=$[EXITCODE+1]
+ fi
+done < LASTLINES_$NAME
+
+rm -rf /etc/sensu/plugins/LASTLINES_$NAME
+
+if [[ $EXITCODE -gt 0 ]]; then
+ echo CRIT:$EXITCODE from last $LINESCOUNT lines.
+ exit 2
+else
+ echo OK:$EXITCODE
+ exit 1
+fi
\ No newline at end of file
diff --git a/sensu/files/checks/check_neutron_agents.py b/sensu/files/checks/check_neutron_agents.py
new file mode 100644
index 0000000..3eadbe1
--- /dev/null
+++ b/sensu/files/checks/check_neutron_agents.py
@@ -0,0 +1,104 @@
+#!/usr/bin/env python
+#
+# Check OpenStack Neutron Agent Status
+# ===
+#
+# Dependencies
+# -----------
+# - python-quantumclient and related libraries
+#
+# Performs API query to determine 'alive' status of all
+# (or filtered list of) Neutron network agents. Also has
+# ability to warn if any agents have been administratively
+# disabled.
+#
+# Copyright 2013 Brian Clark <brian.clark@cloudapt.com>
+#
+# Released under the same terms as Sensu (the MIT license);
+# see LICENSE for details.
+#
+
+import sys
+import argparse
+import logging
+from quantumclient.quantum import client
+
+STATE_OK = 0
+STATE_WARNING = 1
+STATE_CRITICAL = 2
+STATE_UNKNOWN = 3
+
+logging.basicConfig(level=logging.INFO)
+#logging.basicConfig(level=logging.DEBUG)
+
+parser = argparse.ArgumentParser(description='Check OpenStack Neutron agent status')
+parser.add_argument('--auth-url', metavar='URL', type=str,
+ required=True,
+ help='Keystone URL')
+parser.add_argument('--username', metavar='username', type=str,
+ required=True,
+ help='username for authentication')
+parser.add_argument('--password', metavar='password', type=str,
+ required=True,
+ help='password for authentication')
+parser.add_argument('--tenant', metavar='tenant', type=str,
+ required=True,
+ help='tenant name for authentication')
+parser.add_argument('--region_name', metavar='region', type=str,
+ help='Region to select for authentication')
+parser.add_argument('--host', metavar='host', type=str,
+ help='filter by specific host')
+parser.add_argument('--agent-type', metavar='type', type=str,
+ help='filter by specific agent type')
+parser.add_argument('--warn-disabled', action='store_true',
+ default=False,
+ help='warn if any agents administratively disabled')
+args = parser.parse_args()
+
+try:
+ c = client.Client('2.0',
+ username=args.username,
+ tenant_name=args.tenant,
+ password=args.password,
+ auth_url=args.auth_url,
+ region_name=args.region_name)
+ params = {}
+ if args.host: params['host'] = args.host
+ if args.agent_type: params['agent_type'] = args.agent_type
+ agents = c.list_agents(**params)
+except Exception as e:
+ print str(e)
+ sys.exit(STATE_CRITICAL)
+
+agents_down = []
+agents_disabled = []
+messages = []
+exit_state = STATE_OK
+for a in agents['agents']:
+ if a['admin_state_up'] and not a['alive']:
+ agents_down.append(a)
+ elif not a['admin_state_up']:
+ agents_disabled.append(a)
+
+if agents_down:
+ for a in agents_down:
+ messages.append("{} on {} is down".format(a['agent_type'], a['host']))
+ exit_state = STATE_CRITICAL
+
+if args.warn_disabled and agents_disabled:
+ for a in agents_disabled:
+ messages.append("{} on {} is {} and disabled"
+ .format(a['agent_type'],
+ a['host'],
+ 'alive' if a['alive'] else 'down'))
+ if exit_state != STATE_CRITICAL: exit_state = STATE_WARNING
+
+if len(messages) == 1:
+ print "Neutron agent status: {}".format(messages[0])
+else:
+ print "Neutron agent status {} total / {} down / {} disabled".format(len(agents['agents']),
+ len(agents_down),
+ len(agents_disabled))
+
+if len(messages) > 1: print "\n".join(messages)
+exit(exit_state)
diff --git a/sensu/files/checks/check_nova_services.sh b/sensu/files/checks/check_nova_services.sh
new file mode 100644
index 0000000..9038c99
--- /dev/null
+++ b/sensu/files/checks/check_nova_services.sh
@@ -0,0 +1,69 @@
+#!/bin/bash
+#check nova service-list on ctls
+
+while getopts ":u:p:t:h:" opt; do
+ case $opt in
+ u)
+ user=${OPTARG};;
+ p)
+ passwd=${OPTARG};;
+ t)
+ tenant=${OPTARG};;
+ h)
+ host=${OPTARG};;
+ \?)
+ echo "Invalid option";exit 1;;
+ : ) echo "Option -"$OPTARG" requires an argument." >&2
+ exit 1;;
+ esac
+done
+
+read -ra nova_state_down <<< $(nova --os-username $user --os-password $passwd --os-tenant-name $tenant --os-auth-url $host service-list | head -n -1 | tr -d "|" | awk '/'down'/ {print "Service " $1 " on " $2 " is DOWN" ";"}')
+
+EXITVAL=0
+
+if [[ -n ${nova_state_down[@]} ]]; then
+
+ read -ra console_test <<< ${nova_state_down[@]#nova-console}
+
+ if [ ${#nova_state_down[@]} -ne ${#console_test[@]} ]; then
+ EXITVAL=1
+ fi
+
+ read -ra consoleauth_test <<< ${nova_state_down[@]#nova-consoleauth}
+
+ if [ ${#nova_state_down[@]} -ne ${#consoleauth_test[@]} ]; then
+ EXITVAL=1
+ fi
+
+ read -ra cert_test <<< ${nova_state_down[@]#nova-cert}
+
+ if [ ${#nova_state_down[@]} -ne ${#cert_test[@]} ]; then
+ EXITVAL=1
+ fi
+
+ read -ra scheduler_test <<< ${nova_state_down[@]#nova-scheduler}
+
+ if [ ${#nova_state_down[@]} -ne ${#scheduler_test[@]} ]; then
+ EXITVAL=2
+ fi
+
+ read -ra conductor_test <<< ${nova_state_down[@]#nova-conductor}
+
+ if [ ${#nova_state_down[@]} -ne ${#conductor_test[@]} ]; then
+ EXITVAL=2
+ fi
+
+ read -ra compute_test <<< ${nova_state_down[@]#nova-compute}
+
+ if [ ${#nova_state_down[@]} -ne ${#compute_test[@]} ]; then
+ EXITVAL=2
+ fi
+
+fi
+
+if [ $EXITVAL != 0 ]; then
+ echo ${nova_state_down[@]}
+fi
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_pacemaker_actions b/sensu/files/checks/check_pacemaker_actions
new file mode 100644
index 0000000..0155308
--- /dev/null
+++ b/sensu/files/checks/check_pacemaker_actions
@@ -0,0 +1,38 @@
+#!/bin/bash
+#
+# title: check_pcmkactions - check for failed pacemaker actions
+# date created: Tue Jan 25 2011
+# last edit: Thu Jun 30 2011
+# author: Sascha Reimann
+# changelog: - crm_mon, awk & grep put into variables.
+
+# nagios returncodes:
+STATE_OK=0
+STATE_WARNING=1
+STATE_CRITICAL=2
+STATE_UNKNOWN=3
+
+crm_mon="/usr/sbin/crm_mon"
+awk="/usr/bin/awk"
+grep="/bin/grep"
+
+# check for failed actions and set $STATUS
+$crm_mon --one-shot | $grep --quiet "Failed"
+STATUS=$?
+
+# generate output:
+if [ ${STATUS} -eq 0 ]
+then
+DETAILS=$($crm_mon --one-shot | $awk '/Failed/ {f=1}f' | $grep --invert-match Failed)
+COUNT=$($crm_mon --one-shot | $awk '/Failed/ {f=1}f' | $grep --invert-match --count Failed)
+echo "CRITICAL: ${COUNT} failed action(s): ${DETAILS}"
+exit ${STATE_CRITICAL}
+elif [ ${STATUS} -eq 1 ]
+then
+echo "OK: no failed actions found"
+exit ${STATE_OK}
+else
+echo "UNKNOWN: returncode ${STATUS}"
+exit ${STATE_UNKNOWN}
+fi
+
diff --git a/sensu/files/checks/check_snmp_ibm_imm.sh b/sensu/files/checks/check_snmp_ibm_imm.sh
new file mode 100644
index 0000000..0d48122
--- /dev/null
+++ b/sensu/files/checks/check_snmp_ibm_imm.sh
@@ -0,0 +1,197 @@
+#!/bin/sh
+
+#set -x
+
+# Version 0.0.2 2010-08-24
+# Return 3 for unknown results.
+
+# Version 0.0.1 2010-05-21
+# Ulric Eriksson <ulric.eriksson@dgc.se>
+
+BASEOID=.1.3.6.1.4
+IMMOID=$BASEOID.1.2.3.51.3
+
+tempOID=$IMMOID.1.1
+tempsOID=$tempOID.1.0
+# Temperature sensor count
+tempIndexOID=$tempOID.2.1.1
+# Temperature sensor indexes
+tempNameOID=$tempOID.2.1.2
+# Names of temperature sensors
+tempTempOID=$tempOID.2.1.3
+tempFatalOID=$tempOID.2.1.5
+tempCriticalOID=$tempOID.2.1.6
+tempNoncriticalOID=$tempOID.2.1.7
+
+voltOID=$IMMOID.1.2
+voltsOID=$voltOID.1.0
+voltIndexOID=$voltOID.2.1.1
+voltNameOID=$voltOID.2.1.2
+voltVoltOID=$voltOID.2.1.3
+voltCritHighOID=$voltOID.2.1.6
+voltCritLowOID=$voltOID.2.1.7
+
+fanOID=$IMMOID.1.3
+fansOID=$fanOID.1.0
+fanIndexOID=$fanOID.2.1.1
+fanNameOID=$fanOID.2.1.2
+fanSpeedOID=$fanOID.2.1.3
+fanMaxSpeedOID=$fanOID.2.1.8
+
+healthStatOID=$IMMOID.1.4
+# 255 = Normal, 0 = Critical, 2 = Non-critical Error, 4 = System-level Error
+
+# 'label'=value[UOM];[warn];[crit];[min];[max]
+
+usage()
+{
+ echo "Usage: $0 -H host -C community -T health|temperature|voltage|fans"
+ exit 0
+}
+
+get_health()
+{
+ echo "$HEALTH"|grep "^$1."|head -1|sed -e 's,^.*: ,,'|tr -d '"'
+}
+
+get_temperature()
+{
+ echo "$TEMP"|grep "^$2.*$1 = "|head -1|sed -e 's,^.*: ,,'|tr -d '"'
+}
+
+get_voltage()
+{
+ echo "$VOLT"|grep "^$2.*$1 = "|head -1|sed -e 's,^.*: ,,'|tr -d '"'
+}
+
+get_fan()
+{
+ echo "$FANS"|grep "^$2.*$1 = "|head -1|sed -e 's,^.*: ,,'|tr -d '"'
+}
+
+if test "$1" = -h; then
+ usage
+fi
+
+while getopts "H:C:T:" o; do
+ case "$o" in
+ H )
+ HOST="$OPTARG"
+ ;;
+ C )
+ COMMUNITY="$OPTARG"
+ ;;
+ T )
+ TEST="$OPTARG"
+ ;;
+ * )
+ usage
+ ;;
+ esac
+done
+
+RESULT=
+STATUS=0 # OK
+
+case "$TEST" in
+health )
+ HEALTH=`snmpwalk -v 1 -c $COMMUNITY -On $HOST $healthStatOID`
+ healthStat=`get_health $healthStatOID`
+ case "$healthStat" in
+ 0 )
+ RESULT="Health status: Critical"
+ STATUS=2 # Critical
+ ;;
+ 2 )
+ RESULT="Health status: Non-critical error"
+ STATUS=1
+ ;;
+ 4 )
+ RESULT="Health status: System level error"
+ STATUS=2
+ ;;
+ 255 )
+ RESULT="Health status: Normal"
+ ;;
+ * )
+ RESULT="Health status: Unknown"
+ STATUS=3
+ ;;
+ esac
+ ;;
+temperature )
+ TEMP=`snmpwalk -v 1 -c $COMMUNITY -On $HOST $tempOID`
+ # Figure out which temperature indexes we have
+ temps=`echo "$TEMP"|
+ grep -F "$tempIndexOID."|
+ sed -e 's,^.*: ,,'`
+ if test -z "$temps"; then
+ RESULT="No temperatures"
+ STATUS=3
+ fi
+ for i in $temps; do
+ tempName=`get_temperature $i $tempNameOID`
+ tempTemp=`get_temperature $i $tempTempOID`
+ tempFatal=`get_temperature $i $tempFatalOID`
+ tempCritical=`get_temperature $i $tempCriticalOID`
+ tempNoncritical=`get_temperature $i $tempNoncriticalOID`
+ RESULT="$RESULT$tempName = $tempTemp
+"
+ if test "$tempTemp" -ge "$tempCritical"; then
+ STATUS=2
+ elif test "$tempTemp" -ge "$tempNoncritical"; then
+ STATUS=1
+ fi
+ PERFDATA="${PERFDATA}Temperature$i=$tempTemp;;;; "
+ done
+ ;;
+voltage )
+ VOLT=`snmpwalk -v 1 -c $COMMUNITY -On $HOST $voltOID`
+ volts=`echo "$VOLT"|
+ grep -F "$voltIndexOID."|
+ sed -e 's,^.*: ,,'`
+ if test -z "$volts"; then
+ RESULT="No voltages"
+ STATUS=3
+ fi
+ for i in $volts; do
+ voltName=`get_voltage $i $voltNameOID`
+ voltVolt=`get_voltage $i $voltVoltOID`
+ voltCritHigh=`get_voltage $i $voltCritHighOID`
+ voltCritLow=`get_voltage $i $voltCritLowOID`
+ RESULT="$RESULT$voltName = $voltVolt
+"
+ if test "$voltCritLow" -gt 0 -a "$voltVolt" -le "$voltCritLow"; then
+ #echo "$voltVolt < $voltCritLow"
+ STATUS=2
+ elif test "$voltCritHigh" -gt 0 -a "$voltVolt" -ge "$voltCritHigh"; then
+ #echo "$voltVolt > $voltCritLow"
+ STATUS=2
+ fi
+ PERFDATA="${PERFDATA}Voltage$i=$voltVolt;;;; "
+ done
+ ;;
+fans )
+ FANS=`snmpwalk -v 1 -c $COMMUNITY -On $HOST $fanOID`
+ fans=`echo "$FANS"|
+ grep -F "$fanIndexOID."|
+ sed -e 's,^.*: ,,'`
+ if test -z "$fans"; then
+ RESULT="No fans"
+ STATUS=3
+ fi
+ for i in $fans; do
+ fanName=`get_fan $i $fanNameOID`
+ fanSpeed=`get_fan $i $fanSpeedOID|tr -d 'h '`
+ RESULT="$RESULT$fanName = $fanSpeed
+"
+ PERFDATA="${PERFDATA}Fan$i=$fanSpeed;;;; "
+ done
+ ;;
+* )
+ usage
+ ;;
+esac
+
+echo "$RESULT|$PERFDATA"
+exit $STATUS
diff --git a/sensu/files/checks/check_storwize_v7000.sh b/sensu/files/checks/check_storwize_v7000.sh
new file mode 100644
index 0000000..dce5ffc
--- /dev/null
+++ b/sensu/files/checks/check_storwize_v7000.sh
@@ -0,0 +1,48 @@
+#!/bin/bash
+
+#check IBM v7000
+#usage: ./check_storwize_v7000.sh -p password -h host_ip -u user -i ignored_argument
+
+while getopts ":u:p:h:i:" opt; do
+ case $opt in
+ u)
+ USER=${OPTARG};;
+ p)
+ PASSWD=${OPTARG};;
+ h)
+ HOST=${OPTARG};;
+ i)
+ IGNORE=${OPTARG};;
+ \?)
+ echo "Invalid option";exit 1;;
+ : ) echo "Option -"$OPTARG" requires an argument." >&2
+ exit 1;;
+ esac
+done
+
+command -v sshpass >/dev/null 2>&1 || { echo "Missing program sshpass. Aborting." >&2; exit 1; }
+sshpass -p $PASSWD ssh $USER@$HOST -o StrictHostKeyChecking=no "lseventlog -message no" | grep alert > /etc/sensu/plugins/lsevents_$HOST.log
+
+declare -a LINES
+let i=0
+ERRCODES=()
+CRIT=0
+while IFS=$'\n' read -r line_data; do
+ LINES[i]="${line_data}"
+ VAR="${LINES[i++]}"
+ VAR=" ${VAR:100}|"
+ if [[ -n $IGNORE && $VAR == *"$IGNORE"* ]]; then
+ VAR=
+ fi
+ CRIT=$[CRIT+1]
+ ERRCODES+=$VAR
+done < /etc/sensu/plugins/lsevents_$HOST.log
+rm -rf /etc/sensu/plugins/lsevents_$HOST.log
+
+if [[ -z ${ERRCODES[@]} ]]; then
+ echo OK
+ exit 0
+else
+ echo CRIT:$CRIT - ${ERRCODES[@]}
+ exit 2
+fi
\ No newline at end of file
diff --git a/sensu/files/checks/check_storwize_v7000_err_finder.sh b/sensu/files/checks/check_storwize_v7000_err_finder.sh
new file mode 100755
index 0000000..56df328
--- /dev/null
+++ b/sensu/files/checks/check_storwize_v7000_err_finder.sh
@@ -0,0 +1,33 @@
+#!/bin/bash
+
+#check IBM v7000
+#usage: ./check_storwize_v7000_err_finder.sh -p password -h host_ip -u user
+
+while getopts ":u:p:h:" opt; do
+ case $opt in
+ u)
+ USER=${OPTARG};;
+ p)
+ PASSWD=${OPTARG};;
+ h)
+ HOST=${OPTARG};;
+ \?)
+ echo "Invalid option";exit 1;;
+ : ) echo "Option -"$OPTARG" requires an argument." >&2
+ exit 1;;
+ esac
+done
+
+command -v sshpass >/dev/null 2>&1 || { echo "Missing program sshpass. Aborting." >&2; exit 1; }
+
+ERRCODE=`sshpass -p "$PASSWD" ssh $USER@$HOST -o StrictHostKeyChecking=no "svctask finderr" | cut -d "[" -f2 | cut -d "]" -f1`
+
+if [[ ${ERRCODE[@]}="There are no unfixed errors" ]]; then
+ exit 0
+else
+ CONDITION=`cat ./svc_error_database | grep $ERRCODE`
+ echo "Storwize V7000 $HOST $CONDITION"
+ exit 2
+fi
+
+
diff --git a/sensu/files/checks/check_supervisor_proc.py b/sensu/files/checks/check_supervisor_proc.py
new file mode 100644
index 0000000..a13f00e
--- /dev/null
+++ b/sensu/files/checks/check_supervisor_proc.py
@@ -0,0 +1,62 @@
+#!/usr/bin/env python
+"""
+nagios plugin to monitor supervisor processes
+---------------------------------------------
+
+usage
+
+::
+
+ check_supervisor_proc.py -p PROCESS_NAME
+
+ check_supervisor_proc.py -p PROCESS_NAME -s unix:///tmp/supervisord_openstack.sock
+
+"""
+from optparse import OptionParser
+import os
+
+#nagios return codes
+UNKNOWN = -1
+OK = 0
+WARNING = 1
+CRITICAL = 2
+
+SUPERV_STAT_CHECK='sudo supervisorctl'
+
+#supervisor states, map state to desired warning level
+supervisor_states = {
+ 'STOPPED': OK,
+ 'RUNNING': OK,
+ 'STOPPING': WARNING,
+ 'STARTING': WARNING,
+ 'EXITED': CRITICAL,
+ 'BACKOFF': CRITICAL,
+ 'FATAL': CRITICAL,
+ 'UNKNOWN': CRITICAL
+ }
+
+def get_status(proc_name, socket):
+ try:
+ if socket != None:
+ status_output = os.popen('%s -s %s status %s' % (SUPERV_STAT_CHECK, socket, proc_name)).read()
+ else:
+ status_output = os.popen('%s status %s' % (SUPERV_STAT_CHECK, proc_name)).read()
+ proc_status = status_output.split()[1]
+ return (status_output, supervisor_states[proc_status])
+ except:
+ print "CRITICAL: Could not get status of %s" % proc_name
+ raise SystemExit, CRITICAL
+
+parser = OptionParser()
+parser.add_option('-p', '--processes-name', dest='proc_name',
+ help="Name of process as it appears in supervisorctl status")
+parser.add_option('-v', '--verbose', dest='verbose', action='store_true',
+ default=False)
+parser.add_option('-q', '--quiet', dest='quiet', action='store_false')
+parser.add_option('-s', '--socket', dest='socket', default=None)
+
+options, args = parser.parse_args()
+
+output = get_status(options.proc_name, options.socket)
+print output[0]
+raise SystemExit, output[1]
diff --git a/sensu/files/checks/check_tenant_vm.sh b/sensu/files/checks/check_tenant_vm.sh
new file mode 100644
index 0000000..05c21c8
--- /dev/null
+++ b/sensu/files/checks/check_tenant_vm.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+#check vm state in tenant
+
+while getopts ":u:p:t:h:" opt; do
+ case $opt in
+ u)
+ user=${OPTARG};;
+ p)
+ passwd=${OPTARG};;
+ t)
+ tenant=${OPTARG};;
+ h)
+ host=${OPTARG};;
+ \?)
+ echo "Invalid option";exit 1;;
+ : ) echo "Option -"$OPTARG" requires an argument." >&2
+ exit 1;;
+ esac
+done
+read -ra nova_vm_state <<< $(nova --os-username $user --os-password $passwd --os-tenant-name $tenant --os-auth-url $host list | head -n -1 | tr -d "|" | awk '/'ERROR'/ {print "VM: " $2 " ID: " $1 " is in ERROR state" ";"}')
+
+EXITVAL=0
+
+if [[ -n ${nova_vm_state[@]} ]]; then
+ EXITVAL=1
+fi
+
+if [ $EXITVAL != 0 ]; then
+ echo "Tenant: $tenant ${nova_vm_state[@]}"
+fi
+exit $EXITVAL
diff --git a/sensu/files/checks/check_virsh_list.sh b/sensu/files/checks/check_virsh_list.sh
new file mode 100644
index 0000000..e7b559c
--- /dev/null
+++ b/sensu/files/checks/check_virsh_list.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+# Vlata Mikes - remote skript pro check kvm
+
+case $1 in
+ list) virsh list --all;;
+ dumpxml) virsh dumpxml $2;;
+ *) echo invalid option;exit 1;;
+esac
\ No newline at end of file
diff --git a/sensu/files/checks/check_vrouter.sh b/sensu/files/checks/check_vrouter.sh
new file mode 100644
index 0000000..483a389
--- /dev/null
+++ b/sensu/files/checks/check_vrouter.sh
@@ -0,0 +1,70 @@
+#!/bin/bash
+#
+# check vrouter status on compute nodes
+
+service=vrouter
+
+read -ra contrail_status <<< $(sudo supervisorctl -s unix:///tmp/supervisord_$service.sock status)
+
+check_ok=0
+state=RUNNING
+
+read -ra contrail_test <<< ${contrail_status[@]#contrail-$service-agent}
+#compare arrays
+if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ check_ok=1
+fi
+
+if [ check_ok=1 ]; then
+
+ read -ra contrail_test <<< ${contrail_status[@]#STARTING}
+
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STARTING
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#STOPPED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=STOPPED
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#FATAL}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+
+ read -ra contrail_test <<< ${contrail_status[@]#EXITED}
+ if [ ${#contrail_status[@]} -ne ${#contrail_test[@]} ]; then
+ state=FATAL
+ fi
+else
+ state=FATAL
+fi
+
+OK=0
+WARN=0
+CRIT=0
+NUM=0
+
+case "$state" in
+ RUNNING) OK=$(expr $OK + 1) ;;
+ STOPPED|STARTING) WARN=$(expr $WARN + 1) ;;
+ FATAL) CRIT=$(expr $CRIT + 1) ;;
+ *) CRIT=$(expr $CRIT + 1) ;;
+esac
+
+if [ "$NUM" -eq "$OK" ]; then
+ EXITVAL=0 #Status 0 = OK (green)
+fi
+
+if [ "$WARN" -gt 0 ]; then
+ EXITVAL=1 #Status 1 = WARNING (yellow)
+fi
+
+if [ "$CRIT" -gt 0 ]; then
+ EXITVAL=2 #Status 2 = CRITICAL (red)
+fi
+
+echo State of contrail-$service OK:$OK WARN:$WARN CRIT:$CRIT - $LIST
+
+exit $EXITVAL
diff --git a/sensu/files/checks/check_yum b/sensu/files/checks/check_yum
new file mode 100644
index 0000000..0554f6b
--- /dev/null
+++ b/sensu/files/checks/check_yum
@@ -0,0 +1,506 @@
+#!/usr/bin/python
+# coding=utf-8
+
+"""Nagios plugin to check the YUM package management system for package updates. Can optionally alert on any available updates as well as just security related updates."""
+
+__title__ = "check_yum"
+__version__ = "1.0.0"
+
+#Standard Nagios return codes
+OK = 0
+WARNING = 1
+CRITICAL = 2
+UNKNOWN = 3
+
+import os
+import re
+import sys
+import signal
+OLD_PYTHON = False
+try:
+ from subprocess import Popen, PIPE, STDOUT
+except ImportError:
+ OLD_PYTHON = True
+ import commands
+from optparse import OptionParser
+
+DEFAULT_TIMEOUT = 55
+
+
+def end(status, message, perfdata=""):
+ """Exits the plugin with first arg as the return code and the second arg as the message to output."""
+
+ if perfdata:
+ print "%s | %s" % (message, perfdata)
+ else:
+ print "%s" % message
+
+ if status == OK:
+ sys.exit(OK)
+ elif status == WARNING:
+ sys.exit(WARNING)
+ elif status == CRITICAL:
+ sys.exit(CRITICAL)
+ else:
+ sys.exit(UNKNOWN)
+
+
+YUM = "/usr/bin/yum"
+
+def check_yum_usable():
+ """Checks that the YUM program and path are correct and usable - that the program exists and is executable, otherwise exits with error."""
+
+ if not os.path.exists(YUM):
+ end(UNKNOWN, "%s cannot be found" % YUM)
+ elif not os.path.isfile(YUM):
+ end(UNKNOWN, "%s is not a file" % YUM)
+ elif not os.access(YUM, os.X_OK):
+ end(UNKNOWN, "%s is not executable" % YUM)
+
+
+class YumTester:
+ """Class to hold all portage test functions and state."""
+
+ def __init__(self):
+ """Initialize all object variables."""
+
+ self.all_updates = False
+ self.no_cache_update = False
+ self.no_warn_on_lock = False
+ self.no_warn_on_updates = False
+ self.enable_repo = ""
+ self.disable_repo = ""
+ self.timeout = DEFAULT_TIMEOUT
+ self.verbosity = 0
+ self.warn_on_any_update = False
+
+
+ def validate_all_variables(self):
+ """Validates all object variables to make sure the environment is sane."""
+
+ if self.timeout == None:
+ self.timeout = DEFAULT_TIMEOUT
+ try:
+ self.timeout = int(self.timeout)
+ except ValueError:
+ end(UNKNOWN, "Timeout must be an whole number, representing the timeout in seconds")
+
+ if self.timeout < 1 or self.timeout > 3600:
+ end(UNKNOWN, "Timeout must be a number between 1 and 3600 seconds")
+
+ if self.verbosity == None:
+ self.verbosity = 0
+ try:
+ self.verbosity = int(self.verbosity)
+ if self.verbosity < 0:
+ raise ValueError
+ except ValueError:
+ end(UNKNOWN, "Invalid verbosity type, must be positive numeric integer")
+
+
+ def run(self, cmd):
+ """Runs a system command and returns an array of lines of the output."""
+
+ if cmd == "" or cmd == None:
+ end(UNKNOWN, "Internal python error - no cmd supplied for run function")
+
+ if self.no_cache_update:
+ cmd += " -C"
+
+ if self.enable_repo:
+ for repo in self.enable_repo.split(","):
+ cmd += " --enablerepo=%s" % repo
+ if self.disable_repo:
+ for repo in self.disable_repo.split(","):
+ cmd += " --disablerepo=%s" % repo
+
+ self.vprint(3, "running command: %s" % cmd)
+
+ if OLD_PYTHON:
+ self.vprint(3, "subprocess not available, probably old python version, using shell instead")
+ returncode, stdout = commands.getstatusoutput(cmd)
+ if returncode >= 256:
+ returncode = returncode / 256
+ else:
+ try:
+ process = Popen( cmd.split(), stdin=PIPE, stdout=PIPE, stderr=STDOUT )
+ except OSError, error:
+ error = str(error)
+ if error == "No such file or directory":
+ end(UNKNOWN, "Cannot find utility '%s'" % cmd.split()[0])
+ end(UNKNOWN, "Error trying to run utility '%s' - %s" % (cmd.split()[0], error))
+
+ output = process.communicate()
+ returncode = process.returncode
+ stdout = output[0]
+
+ if stdout == None or stdout == "":
+ end(UNKNOWN, "No output from utility '%s'" % cmd.split()[0])
+
+ self.vprint(3, "Returncode: '%s'\nOutput: '%s'" % (returncode, stdout))
+ output = str(stdout).split("\n")
+ self.check_returncode(returncode, output)
+
+ return output
+
+
+ def check_returncode(self, returncode, output):
+ """Takes the returncode and output (as an array of lines) of the YUM program execution and tests for failures, exits with an appropriate message if any are found."""
+
+ if returncode == 0:
+ pass
+ elif returncode == 100:
+ #Updates Available
+ pass
+ elif returncode == 200:
+ if "lock" in output[-2] or "another copy is running" in output[-2]:
+ msg = "Cannot check for updates, another instance of YUM is running"
+ if self.no_warn_on_lock:
+ end(OK, msg)
+ else:
+ end(WARNING, msg)
+ else:
+ output = self.strip_output(output)
+ end(UNKNOWN, "%s" % output)
+ else:
+ if not 'Loading "security" plugin' in output or "Command line error: no such option: --security" in output:
+ end(UNKNOWN, "Security plugin for YUM is required. Try to 'yum install yum-security' and then re-run this plugin. Alternatively, to just alert on any update which does not require the security plugin, try --all-updates")
+ else:
+ output = self.strip_output(output)
+ end(UNKNOWN, "%s" % output)
+
+
+ def strip_output(self, output):
+ """Cleans up the output from the plugin and returns it. Takes and returns an array of the lines of output and returns a single string."""
+
+ self.vprint(3, "stripping output of 'Loading ... plugin' lines")
+ re_loading_plugin = re.compile("^Loading .+ plugin$")
+ output = [re_loading_plugin.sub("", line) for line in output]
+ output = " ".join(output).strip()
+ return output
+
+
+ def set_timeout(self):
+ """Sets an alarm to time out the test."""
+
+ if self.timeout == 1:
+ self.vprint(3, "setting plugin timeout to %s second" % self.timeout)
+ else:
+ self.vprint(3, "setting plugin timeout to %s seconds" % self.timeout)
+
+ signal.signal(signal.SIGALRM, self.sighandler)
+ signal.alarm(self.timeout)
+
+
+ def sighandler(self, discarded, discarded2):
+ """Function to be called by signal.alarm to kill the plugin."""
+
+ #Nop for these variables
+ discarded = discarded2
+ discarded2 = discarded
+
+ end(UNKNOWN, "YUM nagios plugin has self terminated after exceeding the timeout (%s seconds)" % self.timeout)
+
+
+ def get_updates(self):
+ """Checks for updates and returns a tuple containing the number of security updates and the number of total updates."""
+
+ self.vprint(2, "checking for any security updates")
+
+ if self.all_updates:
+ number_security_updates, number_other_updates = self.get_all_updates()
+ else:
+ number_other_updates = self.get_security_updates()
+ number_security_updates = 0
+
+ return number_security_updates, number_other_updates
+
+
+ def get_all_updates(self):
+ """Gets all updates. Returns a single integer of the number of available updates."""
+
+ cmd = "%s check-update" % YUM
+
+ output = self.run(cmd)
+
+ output2 = "\n".join(output).split("\n\n")
+ if self.verbosity >= 4 :
+ for section in output2:
+ print "\nSection:\n%s\n" % section
+ if len(output2) > 2 or not ( "Setting up repositories" in output2[0] or "Loaded plugins: " in output2[0] or re.search('Loading\s+".+"\s+plugin', output2[0]) ):
+ end(WARNING, "YUM output signature does not match current known format. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, please contact the author for a fix")
+ if len(output2) == 1:
+ #There are no updates but we have passed the loading and setting up of repositories
+ number_packages = 0
+ else:
+ number_packages = len([x for x in output2[1].split("\n") if len(x.split()) > 1 ])
+
+ try:
+ number_packages = int(number_packages)
+ if number_packages < 0:
+ raise ValueError
+ except ValueError:
+ end(UNKNOWN, "Error parsing package information, invalid package number, YUM output may have changed. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, then please contact the author for a fix")
+
+ #Extra layer of checks. This is a security plugin so it's preferable to fail on error rather than pass silently leaving you with an insecure system
+ count = 0
+ re_package_format = re.compile("^.+\.(i[3456]86|x86_64|noarch)\s+.+\s+.+$")
+ #This is to work around a YUM truncation issue effectively changing the package output format. Currently only very long kmod lines are seen to have caused this so we stick to what we know for safety and raise an unknown error on anything else for maximum security
+ #re_package_format_truncated = re.compile("^[\w-]+-kmod-\d[\d\.-]+.*\s+.+\s+.+$")
+ for line in output:
+ if re_package_format.match(line):
+ count += 1
+ if count != number_packages:
+ end(UNKNOWN, "Error parsing package information, inconsistent package count, YUM output may have changed. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, then please contact the author for a fix")
+
+ return number_packages
+
+
+ def get_security_updates(self):
+ """Gets all updates, but differentiates between security and normal updates. Returns a tuple of the number of security and normal updates."""
+
+ cmd = "%s --security check-update" % YUM
+
+ output = self.run(cmd)
+
+ re_security_summary_rhel5 = re.compile("Needed \d+ of \d+ packages, for security")
+ re_security_summary_rhel6 = re.compile("\d+ package\(s\) needed for security, out of \d+ available")
+ re_no_security_updates_available_rhel5 = re.compile("No packages needed, for security, \d+ available")
+ re_no_security_updates_available_rhel6 = re.compile("No packages needed for security; \d+ packages available")
+ summary_line_found = False
+ for line in output:
+ if re_no_security_updates_available_rhel5.match(line):
+ summary_line_found = True
+ number_security_updates = 0
+ number_total_updates = line.split()[5]
+ break
+ if re_no_security_updates_available_rhel6.match(line):
+ summary_line_found = True
+ number_security_updates = 0
+ number_total_updates = line.split()[5]
+ break
+ if re_security_summary_rhel5.match(line):
+ summary_line_found = True
+ number_security_updates = line.split()[1]
+ number_total_updates = line.split()[3]
+ break
+ if re_security_summary_rhel6.match(line):
+ summary_line_found = True
+ number_security_updates = line.split()[0]
+ number_total_updates = line.split()[7]
+ break
+
+ if not summary_line_found:
+ end(WARNING, "Cannot find summary line in YUM output. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, please contact the author for a fix")
+
+ try:
+ number_security_updates = int(number_security_updates)
+ number_total_updates = int(number_total_updates)
+ except ValueError:
+ end(WARNING, "Error parsing package information, YUM output may have changed. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, the please contact the author for a fix")
+
+ number_other_updates = number_total_updates - number_security_updates
+
+ if len(output) > number_total_updates + 25:
+ end(WARNING, "YUM output signature is larger than current known format, please make sure you have upgraded to the latest version of this plugin. If the problem persists, please contact the author for a fix")
+
+ return number_security_updates, number_other_updates
+
+
+ def test_yum_updates(self):
+ """Starts tests and controls logic flow."""
+
+ check_yum_usable()
+ self.vprint(3, "%s - Version %s\n" % (__title__, __version__))
+
+ self.validate_all_variables()
+ self.set_timeout()
+
+ if self.all_updates:
+ return self.test_all_updates()
+ else:
+ return self.test_security_updates()
+
+
+ def test_all_updates(self):
+ """Tests for all updates, and returns a tuple of the status code and output."""
+
+ status = UNKNOWN
+ message = "code error - please contact author for a fix"
+
+ number_updates = self.get_all_updates()
+ if number_updates == 0:
+ status = OK
+ message = "0 Updates Available"
+ else:
+ if self.no_warn_on_updates:
+ status = OK
+ else:
+ status = CRITICAL
+ if number_updates == 1:
+ message = "1 Update Available"
+ else:
+ message = "%s Updates Available" % number_updates
+
+ return status, message
+
+
+ def test_security_updates(self):
+ """Tests for security updates and returns a tuple of the status code and output."""
+
+ status = UNKNOWN
+ message = "code error - please contact author for a fix"
+
+ number_security_updates, number_other_updates = self.get_security_updates()
+ if number_security_updates == 0:
+ status = OK
+ message = "0 Security Updates Available"
+ else:
+ if self.no_warn_on_updates:
+ status = OK
+ else:
+ status = CRITICAL
+ if number_security_updates == 1:
+ message = "1 Security Update Available"
+ elif number_security_updates > 1:
+ message = "%s Security Updates Available" % number_security_updates
+
+ if number_other_updates != 0:
+ if self.warn_on_any_update and status != CRITICAL:
+ if self.no_warn_on_updates:
+ status = OK
+ else:
+ status = WARNING
+
+ if number_other_updates == 1:
+ message += ". 1 Non-Security Update Available"
+ else:
+ message += ". %s Non-Security Updates Available" % number_other_updates
+
+ return status, message
+
+
+ def vprint(self, threshold, message):
+ """Prints a message if the first arg is numerically greater than the verbosity level."""
+
+ if self.verbosity >= threshold:
+ print "%s" % message
+
+
+def main():
+ """Parses command line options and calls the test function."""
+
+ tester = YumTester()
+ parser = OptionParser()
+
+ parser.add_option("--all-updates",
+ action="store_true",
+ dest="all_updates",
+ help="Does not distinguish between security and non-security updates, but returns critical for any available update. This may be used if the YUM security plugin is absent or you want to maintain every single package at the latest version. You may want to use --warn-on-any-update instead of this option.")
+
+ parser.add_option("--warn-on-any-update",
+ action="store_true",
+ dest="warn_on_any_update",
+ help="Warns if there are any (non-security) package updates available. By default only warns when security related updates are available. If --all-updates is used, then this option is redundant as --all-updates will return a critical result on any available update, whereas using this switch still allows you to differentiate between the severity of updates.")
+
+ parser.add_option("-C", "--cache-only",
+ action="store_true",
+ dest="no_cache_update",
+ help="Run entirely from cache and do not update the cache when running YUM. Useful if you have 'yum makecache' cronned so that the nagios check itself doesn't have to do it, possibly speeding up execution (by 1-2 seconds in tests).")
+
+ parser.add_option("--no-warn-on-lock",
+ action="store_true",
+ dest="no_warn_on_lock",
+ help="Return OK instead of WARNING when YUM is locked and fails to check for updates due to another instance running. This is not recommended from the security standpoint, but may be wanted to reduce the number of alerts that may intermittently pop up when someone is running YUM interactively for package management.")
+
+ parser.add_option("--no-warn-on-updates",
+ action="store_true",
+ dest="no_warn_on_updates",
+ help="Return OK instead of WARNING even when updates are available. This is not recommended from the security standpoint, but may be wanted to disable alerts while the plugin output still shows the number of available updates.")
+
+ parser.add_option("--enablerepo",
+ dest="repository_to_enable",
+ help="Explicitly enables a reposity when calling YUM. Can take a comma separated list of repositories. Note that enabling repositories can lead to unexpected results, for example when protected repositories are enabled.")
+
+ parser.add_option("--disablerepo",
+ dest="repository_to_disable",
+ help="Explicitly disables a repository when calling YUM. Can take a comma separated list of repositories. Note that disabling repositories can lead to unexpected results, for example when protected repositories are disabled.")
+
+ parser.add_option("-t", "--timeout",
+ dest="timeout",
+ help="Sets a timeout in seconds after which the plugin will exit (defaults to %s seconds)." % DEFAULT_TIMEOUT)
+
+ parser.add_option("-v", "--verbose",
+ action="count",
+ dest="verbosity",
+ help="Verbose mode. Can be used multiple times to increase output. Use -vvv for debugging output. By default only one result line is printed as per Nagios standards.")
+
+ parser.add_option("-V", "--version",
+ action="store_true",
+ dest="version",
+ help="Print version number and exit.")
+
+ (options, args) = parser.parse_args()
+
+ if args:
+ parser.print_help()
+ sys.exit(UNKNOWN)
+
+ tester.all_updates = options.all_updates
+ tester.no_cache_update = options.no_cache_update
+ tester.no_warn_on_lock = options.no_warn_on_lock
+ tester.no_warn_on_updates = options.no_warn_on_updates
+ tester.enable_repo = options.repository_to_enable
+ tester.disable_repo = options.repository_to_disable
+ tester.timeout = options.timeout
+ tester.verbosity = options.verbosity
+ tester.warn_on_any_update = options.warn_on_any_update
+
+ if options.version:
+ print "%s - Version %s\n" % (__title__, __version__)
+ sys.exit(OK)
+
+ result, output = tester.test_yum_updates()
+ end(result, output)
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except KeyboardInterrupt:
+ print "Caught Control-C..."
+ sys.exit(CRITICAL)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#Copyright © ?–?, Hari Sekhon <harisekhon@gmail.com>.
+#Copyright © 2012, Christoph Anton Mitterer <mail@christoph.anton.mitterer.name>.
+#All rights reserved.
+#
+#
+#This program is free software; you can redistribute it and/or
+#modify it under the terms of the GNU General Public License
+#as published by the Free Software Foundation; version 2
+#of the License.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with this program; if not, write to the Free Software
+#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
\ No newline at end of file
diff --git a/sensu/files/checks/check_yum.py b/sensu/files/checks/check_yum.py
new file mode 100644
index 0000000..0554f6b
--- /dev/null
+++ b/sensu/files/checks/check_yum.py
@@ -0,0 +1,506 @@
+#!/usr/bin/python
+# coding=utf-8
+
+"""Nagios plugin to check the YUM package management system for package updates. Can optionally alert on any available updates as well as just security related updates."""
+
+__title__ = "check_yum"
+__version__ = "1.0.0"
+
+#Standard Nagios return codes
+OK = 0
+WARNING = 1
+CRITICAL = 2
+UNKNOWN = 3
+
+import os
+import re
+import sys
+import signal
+OLD_PYTHON = False
+try:
+ from subprocess import Popen, PIPE, STDOUT
+except ImportError:
+ OLD_PYTHON = True
+ import commands
+from optparse import OptionParser
+
+DEFAULT_TIMEOUT = 55
+
+
+def end(status, message, perfdata=""):
+ """Exits the plugin with first arg as the return code and the second arg as the message to output."""
+
+ if perfdata:
+ print "%s | %s" % (message, perfdata)
+ else:
+ print "%s" % message
+
+ if status == OK:
+ sys.exit(OK)
+ elif status == WARNING:
+ sys.exit(WARNING)
+ elif status == CRITICAL:
+ sys.exit(CRITICAL)
+ else:
+ sys.exit(UNKNOWN)
+
+
+YUM = "/usr/bin/yum"
+
+def check_yum_usable():
+ """Checks that the YUM program and path are correct and usable - that the program exists and is executable, otherwise exits with error."""
+
+ if not os.path.exists(YUM):
+ end(UNKNOWN, "%s cannot be found" % YUM)
+ elif not os.path.isfile(YUM):
+ end(UNKNOWN, "%s is not a file" % YUM)
+ elif not os.access(YUM, os.X_OK):
+ end(UNKNOWN, "%s is not executable" % YUM)
+
+
+class YumTester:
+ """Class to hold all portage test functions and state."""
+
+ def __init__(self):
+ """Initialize all object variables."""
+
+ self.all_updates = False
+ self.no_cache_update = False
+ self.no_warn_on_lock = False
+ self.no_warn_on_updates = False
+ self.enable_repo = ""
+ self.disable_repo = ""
+ self.timeout = DEFAULT_TIMEOUT
+ self.verbosity = 0
+ self.warn_on_any_update = False
+
+
+ def validate_all_variables(self):
+ """Validates all object variables to make sure the environment is sane."""
+
+ if self.timeout == None:
+ self.timeout = DEFAULT_TIMEOUT
+ try:
+ self.timeout = int(self.timeout)
+ except ValueError:
+ end(UNKNOWN, "Timeout must be an whole number, representing the timeout in seconds")
+
+ if self.timeout < 1 or self.timeout > 3600:
+ end(UNKNOWN, "Timeout must be a number between 1 and 3600 seconds")
+
+ if self.verbosity == None:
+ self.verbosity = 0
+ try:
+ self.verbosity = int(self.verbosity)
+ if self.verbosity < 0:
+ raise ValueError
+ except ValueError:
+ end(UNKNOWN, "Invalid verbosity type, must be positive numeric integer")
+
+
+ def run(self, cmd):
+ """Runs a system command and returns an array of lines of the output."""
+
+ if cmd == "" or cmd == None:
+ end(UNKNOWN, "Internal python error - no cmd supplied for run function")
+
+ if self.no_cache_update:
+ cmd += " -C"
+
+ if self.enable_repo:
+ for repo in self.enable_repo.split(","):
+ cmd += " --enablerepo=%s" % repo
+ if self.disable_repo:
+ for repo in self.disable_repo.split(","):
+ cmd += " --disablerepo=%s" % repo
+
+ self.vprint(3, "running command: %s" % cmd)
+
+ if OLD_PYTHON:
+ self.vprint(3, "subprocess not available, probably old python version, using shell instead")
+ returncode, stdout = commands.getstatusoutput(cmd)
+ if returncode >= 256:
+ returncode = returncode / 256
+ else:
+ try:
+ process = Popen( cmd.split(), stdin=PIPE, stdout=PIPE, stderr=STDOUT )
+ except OSError, error:
+ error = str(error)
+ if error == "No such file or directory":
+ end(UNKNOWN, "Cannot find utility '%s'" % cmd.split()[0])
+ end(UNKNOWN, "Error trying to run utility '%s' - %s" % (cmd.split()[0], error))
+
+ output = process.communicate()
+ returncode = process.returncode
+ stdout = output[0]
+
+ if stdout == None or stdout == "":
+ end(UNKNOWN, "No output from utility '%s'" % cmd.split()[0])
+
+ self.vprint(3, "Returncode: '%s'\nOutput: '%s'" % (returncode, stdout))
+ output = str(stdout).split("\n")
+ self.check_returncode(returncode, output)
+
+ return output
+
+
+ def check_returncode(self, returncode, output):
+ """Takes the returncode and output (as an array of lines) of the YUM program execution and tests for failures, exits with an appropriate message if any are found."""
+
+ if returncode == 0:
+ pass
+ elif returncode == 100:
+ #Updates Available
+ pass
+ elif returncode == 200:
+ if "lock" in output[-2] or "another copy is running" in output[-2]:
+ msg = "Cannot check for updates, another instance of YUM is running"
+ if self.no_warn_on_lock:
+ end(OK, msg)
+ else:
+ end(WARNING, msg)
+ else:
+ output = self.strip_output(output)
+ end(UNKNOWN, "%s" % output)
+ else:
+ if not 'Loading "security" plugin' in output or "Command line error: no such option: --security" in output:
+ end(UNKNOWN, "Security plugin for YUM is required. Try to 'yum install yum-security' and then re-run this plugin. Alternatively, to just alert on any update which does not require the security plugin, try --all-updates")
+ else:
+ output = self.strip_output(output)
+ end(UNKNOWN, "%s" % output)
+
+
+ def strip_output(self, output):
+ """Cleans up the output from the plugin and returns it. Takes and returns an array of the lines of output and returns a single string."""
+
+ self.vprint(3, "stripping output of 'Loading ... plugin' lines")
+ re_loading_plugin = re.compile("^Loading .+ plugin$")
+ output = [re_loading_plugin.sub("", line) for line in output]
+ output = " ".join(output).strip()
+ return output
+
+
+ def set_timeout(self):
+ """Sets an alarm to time out the test."""
+
+ if self.timeout == 1:
+ self.vprint(3, "setting plugin timeout to %s second" % self.timeout)
+ else:
+ self.vprint(3, "setting plugin timeout to %s seconds" % self.timeout)
+
+ signal.signal(signal.SIGALRM, self.sighandler)
+ signal.alarm(self.timeout)
+
+
+ def sighandler(self, discarded, discarded2):
+ """Function to be called by signal.alarm to kill the plugin."""
+
+ #Nop for these variables
+ discarded = discarded2
+ discarded2 = discarded
+
+ end(UNKNOWN, "YUM nagios plugin has self terminated after exceeding the timeout (%s seconds)" % self.timeout)
+
+
+ def get_updates(self):
+ """Checks for updates and returns a tuple containing the number of security updates and the number of total updates."""
+
+ self.vprint(2, "checking for any security updates")
+
+ if self.all_updates:
+ number_security_updates, number_other_updates = self.get_all_updates()
+ else:
+ number_other_updates = self.get_security_updates()
+ number_security_updates = 0
+
+ return number_security_updates, number_other_updates
+
+
+ def get_all_updates(self):
+ """Gets all updates. Returns a single integer of the number of available updates."""
+
+ cmd = "%s check-update" % YUM
+
+ output = self.run(cmd)
+
+ output2 = "\n".join(output).split("\n\n")
+ if self.verbosity >= 4 :
+ for section in output2:
+ print "\nSection:\n%s\n" % section
+ if len(output2) > 2 or not ( "Setting up repositories" in output2[0] or "Loaded plugins: " in output2[0] or re.search('Loading\s+".+"\s+plugin', output2[0]) ):
+ end(WARNING, "YUM output signature does not match current known format. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, please contact the author for a fix")
+ if len(output2) == 1:
+ #There are no updates but we have passed the loading and setting up of repositories
+ number_packages = 0
+ else:
+ number_packages = len([x for x in output2[1].split("\n") if len(x.split()) > 1 ])
+
+ try:
+ number_packages = int(number_packages)
+ if number_packages < 0:
+ raise ValueError
+ except ValueError:
+ end(UNKNOWN, "Error parsing package information, invalid package number, YUM output may have changed. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, then please contact the author for a fix")
+
+ #Extra layer of checks. This is a security plugin so it's preferable to fail on error rather than pass silently leaving you with an insecure system
+ count = 0
+ re_package_format = re.compile("^.+\.(i[3456]86|x86_64|noarch)\s+.+\s+.+$")
+ #This is to work around a YUM truncation issue effectively changing the package output format. Currently only very long kmod lines are seen to have caused this so we stick to what we know for safety and raise an unknown error on anything else for maximum security
+ #re_package_format_truncated = re.compile("^[\w-]+-kmod-\d[\d\.-]+.*\s+.+\s+.+$")
+ for line in output:
+ if re_package_format.match(line):
+ count += 1
+ if count != number_packages:
+ end(UNKNOWN, "Error parsing package information, inconsistent package count, YUM output may have changed. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, then please contact the author for a fix")
+
+ return number_packages
+
+
+ def get_security_updates(self):
+ """Gets all updates, but differentiates between security and normal updates. Returns a tuple of the number of security and normal updates."""
+
+ cmd = "%s --security check-update" % YUM
+
+ output = self.run(cmd)
+
+ re_security_summary_rhel5 = re.compile("Needed \d+ of \d+ packages, for security")
+ re_security_summary_rhel6 = re.compile("\d+ package\(s\) needed for security, out of \d+ available")
+ re_no_security_updates_available_rhel5 = re.compile("No packages needed, for security, \d+ available")
+ re_no_security_updates_available_rhel6 = re.compile("No packages needed for security; \d+ packages available")
+ summary_line_found = False
+ for line in output:
+ if re_no_security_updates_available_rhel5.match(line):
+ summary_line_found = True
+ number_security_updates = 0
+ number_total_updates = line.split()[5]
+ break
+ if re_no_security_updates_available_rhel6.match(line):
+ summary_line_found = True
+ number_security_updates = 0
+ number_total_updates = line.split()[5]
+ break
+ if re_security_summary_rhel5.match(line):
+ summary_line_found = True
+ number_security_updates = line.split()[1]
+ number_total_updates = line.split()[3]
+ break
+ if re_security_summary_rhel6.match(line):
+ summary_line_found = True
+ number_security_updates = line.split()[0]
+ number_total_updates = line.split()[7]
+ break
+
+ if not summary_line_found:
+ end(WARNING, "Cannot find summary line in YUM output. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, please contact the author for a fix")
+
+ try:
+ number_security_updates = int(number_security_updates)
+ number_total_updates = int(number_total_updates)
+ except ValueError:
+ end(WARNING, "Error parsing package information, YUM output may have changed. Please make sure you have upgraded to the latest version of this plugin. If the problem persists, the please contact the author for a fix")
+
+ number_other_updates = number_total_updates - number_security_updates
+
+ if len(output) > number_total_updates + 25:
+ end(WARNING, "YUM output signature is larger than current known format, please make sure you have upgraded to the latest version of this plugin. If the problem persists, please contact the author for a fix")
+
+ return number_security_updates, number_other_updates
+
+
+ def test_yum_updates(self):
+ """Starts tests and controls logic flow."""
+
+ check_yum_usable()
+ self.vprint(3, "%s - Version %s\n" % (__title__, __version__))
+
+ self.validate_all_variables()
+ self.set_timeout()
+
+ if self.all_updates:
+ return self.test_all_updates()
+ else:
+ return self.test_security_updates()
+
+
+ def test_all_updates(self):
+ """Tests for all updates, and returns a tuple of the status code and output."""
+
+ status = UNKNOWN
+ message = "code error - please contact author for a fix"
+
+ number_updates = self.get_all_updates()
+ if number_updates == 0:
+ status = OK
+ message = "0 Updates Available"
+ else:
+ if self.no_warn_on_updates:
+ status = OK
+ else:
+ status = CRITICAL
+ if number_updates == 1:
+ message = "1 Update Available"
+ else:
+ message = "%s Updates Available" % number_updates
+
+ return status, message
+
+
+ def test_security_updates(self):
+ """Tests for security updates and returns a tuple of the status code and output."""
+
+ status = UNKNOWN
+ message = "code error - please contact author for a fix"
+
+ number_security_updates, number_other_updates = self.get_security_updates()
+ if number_security_updates == 0:
+ status = OK
+ message = "0 Security Updates Available"
+ else:
+ if self.no_warn_on_updates:
+ status = OK
+ else:
+ status = CRITICAL
+ if number_security_updates == 1:
+ message = "1 Security Update Available"
+ elif number_security_updates > 1:
+ message = "%s Security Updates Available" % number_security_updates
+
+ if number_other_updates != 0:
+ if self.warn_on_any_update and status != CRITICAL:
+ if self.no_warn_on_updates:
+ status = OK
+ else:
+ status = WARNING
+
+ if number_other_updates == 1:
+ message += ". 1 Non-Security Update Available"
+ else:
+ message += ". %s Non-Security Updates Available" % number_other_updates
+
+ return status, message
+
+
+ def vprint(self, threshold, message):
+ """Prints a message if the first arg is numerically greater than the verbosity level."""
+
+ if self.verbosity >= threshold:
+ print "%s" % message
+
+
+def main():
+ """Parses command line options and calls the test function."""
+
+ tester = YumTester()
+ parser = OptionParser()
+
+ parser.add_option("--all-updates",
+ action="store_true",
+ dest="all_updates",
+ help="Does not distinguish between security and non-security updates, but returns critical for any available update. This may be used if the YUM security plugin is absent or you want to maintain every single package at the latest version. You may want to use --warn-on-any-update instead of this option.")
+
+ parser.add_option("--warn-on-any-update",
+ action="store_true",
+ dest="warn_on_any_update",
+ help="Warns if there are any (non-security) package updates available. By default only warns when security related updates are available. If --all-updates is used, then this option is redundant as --all-updates will return a critical result on any available update, whereas using this switch still allows you to differentiate between the severity of updates.")
+
+ parser.add_option("-C", "--cache-only",
+ action="store_true",
+ dest="no_cache_update",
+ help="Run entirely from cache and do not update the cache when running YUM. Useful if you have 'yum makecache' cronned so that the nagios check itself doesn't have to do it, possibly speeding up execution (by 1-2 seconds in tests).")
+
+ parser.add_option("--no-warn-on-lock",
+ action="store_true",
+ dest="no_warn_on_lock",
+ help="Return OK instead of WARNING when YUM is locked and fails to check for updates due to another instance running. This is not recommended from the security standpoint, but may be wanted to reduce the number of alerts that may intermittently pop up when someone is running YUM interactively for package management.")
+
+ parser.add_option("--no-warn-on-updates",
+ action="store_true",
+ dest="no_warn_on_updates",
+ help="Return OK instead of WARNING even when updates are available. This is not recommended from the security standpoint, but may be wanted to disable alerts while the plugin output still shows the number of available updates.")
+
+ parser.add_option("--enablerepo",
+ dest="repository_to_enable",
+ help="Explicitly enables a reposity when calling YUM. Can take a comma separated list of repositories. Note that enabling repositories can lead to unexpected results, for example when protected repositories are enabled.")
+
+ parser.add_option("--disablerepo",
+ dest="repository_to_disable",
+ help="Explicitly disables a repository when calling YUM. Can take a comma separated list of repositories. Note that disabling repositories can lead to unexpected results, for example when protected repositories are disabled.")
+
+ parser.add_option("-t", "--timeout",
+ dest="timeout",
+ help="Sets a timeout in seconds after which the plugin will exit (defaults to %s seconds)." % DEFAULT_TIMEOUT)
+
+ parser.add_option("-v", "--verbose",
+ action="count",
+ dest="verbosity",
+ help="Verbose mode. Can be used multiple times to increase output. Use -vvv for debugging output. By default only one result line is printed as per Nagios standards.")
+
+ parser.add_option("-V", "--version",
+ action="store_true",
+ dest="version",
+ help="Print version number and exit.")
+
+ (options, args) = parser.parse_args()
+
+ if args:
+ parser.print_help()
+ sys.exit(UNKNOWN)
+
+ tester.all_updates = options.all_updates
+ tester.no_cache_update = options.no_cache_update
+ tester.no_warn_on_lock = options.no_warn_on_lock
+ tester.no_warn_on_updates = options.no_warn_on_updates
+ tester.enable_repo = options.repository_to_enable
+ tester.disable_repo = options.repository_to_disable
+ tester.timeout = options.timeout
+ tester.verbosity = options.verbosity
+ tester.warn_on_any_update = options.warn_on_any_update
+
+ if options.version:
+ print "%s - Version %s\n" % (__title__, __version__)
+ sys.exit(OK)
+
+ result, output = tester.test_yum_updates()
+ end(result, output)
+
+
+if __name__ == "__main__":
+ try:
+ main()
+ except KeyboardInterrupt:
+ print "Caught Control-C..."
+ sys.exit(CRITICAL)
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+#Copyright © ?–?, Hari Sekhon <harisekhon@gmail.com>.
+#Copyright © 2012, Christoph Anton Mitterer <mail@christoph.anton.mitterer.name>.
+#All rights reserved.
+#
+#
+#This program is free software; you can redistribute it and/or
+#modify it under the terms of the GNU General Public License
+#as published by the Free Software Foundation; version 2
+#of the License.
+#
+#This program is distributed in the hope that it will be useful,
+#but WITHOUT ANY WARRANTY; without even the implied warranty of
+#MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+#GNU General Public License for more details.
+#
+#You should have received a copy of the GNU General Public License
+#along with this program; if not, write to the Free Software
+#Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
\ No newline at end of file
diff --git a/sensu/files/checks/haproxy-metrics.rb b/sensu/files/checks/haproxy-metrics.rb
new file mode 100644
index 0000000..cfb41f6
--- /dev/null
+++ b/sensu/files/checks/haproxy-metrics.rb
@@ -0,0 +1,183 @@
+#! /usr/bin/env ruby
+#
+# <script name>
+#
+# DESCRIPTION:
+# If you are occassionally seeing "nil output" from this check, make sure you have
+# sensu-plugin >= 0.1.7. This will provide a better error message.
+#
+# OUTPUT:
+# metric data, etc
+#
+# PLATFORMS:
+# Linux, Windows, BSD, Solaris, etc
+#
+# DEPENDENCIES:
+# gem: sensu-plugin
+# gem: <?>
+#
+# USAGE:
+# example commands
+#
+# NOTES:
+# #YELLOW
+# backend pool single node stats
+#
+# LICENSE:
+# Pete Shima <me@peteshima.com>, Joe Miller <https://github.com/joemiller>
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+#
+
+require 'rubygems' if RUBY_VERSION < '1.9.0'
+require 'sensu-plugin/metric/cli'
+require 'net/http'
+require 'net/https'
+require 'socket'
+require 'csv'
+require 'uri'
+
+class HAProxyMetrics < Sensu::Plugin::Metric::CLI::Graphite
+ option :connection,
+ short: '-c HOSTNAME|SOCKETPATH',
+ long: '--connect HOSTNAME|SOCKETPATH',
+ description: 'HAproxy web stats hostname or path to stats socket',
+ required: true
+
+ option :port,
+ short: '-P PORT',
+ long: '--port PORT',
+ description: 'HAproxy web stats port',
+ default: '80'
+
+ option :path,
+ short: '-q STATUSPATH',
+ long: '--statspath STATUSPATH',
+ description: 'HAproxy web stats path (the / will be prepended to the STATUSPATH e.g stats)',
+ default: '/'
+
+ option :username,
+ short: '-u USERNAME',
+ long: '--user USERNAME',
+ description: 'HAproxy web stats username'
+
+ option :password,
+ short: '-p PASSWORD',
+ long: '--pass PASSWORD',
+ description: 'HAproxy web stats password'
+
+ option :scheme,
+ description: 'Metric naming scheme, text to prepend to metric',
+ short: '-s SCHEME',
+ long: '--scheme SCHEME',
+ default: "#{Socket.gethostname}.haproxy"
+
+ option :use_ssl,
+ description: 'Use SSL to connect to HAproxy web stats',
+ short: '-S',
+ long: '--use-ssl',
+ boolean: true,
+ default: false
+
+ option :backends,
+ description: 'comma-separated list of backends to fetch stats from. Default is all backends',
+ short: '-f BACKEND1[,BACKEND2]',
+ long: '--backends BACKEND1[,BACKEND2]',
+ proc: proc { |l| l.split(',') },
+ default: [] # an empty list means show all backends
+
+ option :server_metrics,
+ description: 'Add metrics for backend servers',
+ boolean: true,
+ long: '--server-metrics',
+ default: false
+
+ option :retries,
+ description: 'Number of times to retry fetching stats from haproxy before giving up.',
+ short: '-r RETRIES',
+ long: '--retries RETRIES',
+ default: 3,
+ proc: proc(&:to_i)
+
+ option :retry_interval,
+ description: 'Interval (seconds) between retries',
+ short: '-i SECONDS',
+ long: '--retry_interval SECONDS',
+ default: 1,
+ proc: proc(&:to_i)
+
+ def acquire_stats
+ uri = URI.parse(config[:connection])
+
+ if uri.is_a?(URI::Generic) && File.socket?(uri.path)
+ socket = UNIXSocket.new(config[:connection])
+ socket.puts('show stat')
+ out = socket.read
+ socket.close
+ else
+ res = Net::HTTP.start(config[:connection], config[:port], use_ssl: config[:use_ssl]) do |http|
+ req = Net::HTTP::Get.new("/#{config[:path]};csv;norefresh")
+ unless config[:username].nil?
+ req.basic_auth config[:username], config[:password]
+ end
+ http.request(req)
+ end
+ out = res.body
+ end
+ return out
+ rescue
+ return nil
+ end
+
+ def run
+ out = nil
+ 1.upto(config[:retries]) do |_i|
+ out = acquire_stats
+ break unless out.to_s.length.zero?
+ sleep(config[:retry_interval])
+ end
+
+ if out.to_s.length.zero?
+ warning "Unable to fetch stats from haproxy after #{config[:retries]} attempts"
+ end
+
+ parsed = CSV.parse(out)
+ parsed.shift
+ parsed.each do |line|
+ if config[:backends].length > 0
+ next unless config[:backends].include? line[0]
+ end
+
+ if line[1] == 'BACKEND'
+ output "#{config[:scheme]}.#{line[0]}.session_current", line[4]
+ output "#{config[:scheme]}.#{line[0]}.session_total", line[7]
+ output "#{config[:scheme]}.#{line[0]}.bytes_in", line[8]
+ output "#{config[:scheme]}.#{line[0]}.bytes_out", line[9]
+ output "#{config[:scheme]}.#{line[0]}.connection_errors", line[13]
+ output "#{config[:scheme]}.#{line[0]}.warning_retries", line[15]
+ output "#{config[:scheme]}.#{line[0]}.warning_redispatched", line[16]
+ output "#{config[:scheme]}.#{line[0]}.response_1xx", line[39]
+ output "#{config[:scheme]}.#{line[0]}.response_2xx", line[40]
+ output "#{config[:scheme]}.#{line[0]}.response_3xx", line[41]
+ output "#{config[:scheme]}.#{line[0]}.response_4xx", line[42]
+ output "#{config[:scheme]}.#{line[0]}.response_5xx", line[43]
+ output "#{config[:scheme]}.#{line[0]}.response_other", line[44]
+ unless line[46].nil?
+ output "#{config[:scheme]}.#{line[0]}.requests_per_second", line[46]
+ end
+ unless line[47].nil?
+ output "#{config[:scheme]}.#{line[0]}.requests_per_second_max", line[47]
+ end
+ output "#{config[:scheme]}.#{line[0]}.queue_time", line[58]
+ output "#{config[:scheme]}.#{line[0]}.connect_time", line[59]
+ output "#{config[:scheme]}.#{line[0]}.response_time", line[60]
+ output "#{config[:scheme]}.#{line[0]}.average_time", line[61]
+ elsif config[:server_metrics]
+ output "#{config[:scheme]}.#{line[0]}.#{line[1]}.session_total", line[7]
+ end
+
+ end
+
+ ok
+ end
+end
diff --git a/sensu/files/checks/mysql-replication-status.rb b/sensu/files/checks/mysql-replication-status.rb
new file mode 100644
index 0000000..2b7b0b0
--- /dev/null
+++ b/sensu/files/checks/mysql-replication-status.rb
@@ -0,0 +1,190 @@
+#! /usr/bin/env ruby
+#
+# <script name>
+#
+# DESCRIPTION:
+# what is this thing supposed to do, monitor? How do alerts or
+# alarms work?
+#
+# OUTPUT:
+# plain text, metric data, etc
+#
+# PLATFORMS:
+# Linux, Windows, BSD, Solaris, etc
+#
+# DEPENDENCIES:
+# gem: sensu-plugin
+# gem: <?>
+#
+# USAGE:
+# example commands
+#
+# NOTES:
+# Does it behave differently on specific platforms, specific use cases, etc
+#
+# LICENSE:
+# <your name> <your email>
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+#
+
+# !/usr/bin/env ruby
+#
+# MySQL Replication Status (modded from disk)
+# ===
+#
+# Copyright 2011 Sonian, Inc <chefs@sonian.net>
+# Updated by Oluwaseun Obajobi 2014 to accept ini argument
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+#
+# USING INI ARGUMENT
+# This was implemented to load mysql credentials without parsing the username/password.
+# The ini file should be readable by the sensu user/group.
+# Ref: http://eric.lubow.org/2009/ruby/parsing-ini-files-with-ruby/
+#
+# EXAMPLE
+# mysql-alive.rb -h db01 --ini '/etc/sensu/my.cnf'
+#
+# MY.CNF INI FORMAT
+# [client]
+# user=sensu
+# password="abcd1234"
+#
+
+require 'rubygems' if RUBY_VERSION < '1.9.0'
+require 'sensu-plugin/check/cli'
+require 'mysql'
+require 'inifile'
+
+class CheckMysqlReplicationStatus < Sensu::Plugin::Check::CLI
+ option :host,
+ short: '-h',
+ long: '--host=VALUE',
+ description: 'Database host'
+
+ option :port,
+ short: '-P',
+ long: '--port=VALUE',
+ description: 'Database port',
+ default: 3306,
+ # #YELLOW
+ proc: lambda { |s| s.to_i } # rubocop:disable Lambda
+
+ option :socket,
+ short: '-s SOCKET',
+ long: '--socket SOCKET',
+ description: 'Socket to use'
+
+ option :user,
+ short: '-u',
+ long: '--username=VALUE',
+ description: 'Database username'
+
+ option :pass,
+ short: '-p',
+ long: '--password=VALUE',
+ description: 'Database password'
+
+ option :ini,
+ short: '-i',
+ long: '--ini VALUE',
+ description: 'My.cnf ini file'
+
+ option :warn,
+ short: '-w',
+ long: '--warning=VALUE',
+ description: 'Warning threshold for replication lag',
+ default: 900,
+ # #YELLOW
+ proc: lambda { |s| s.to_i } # rubocop:disable Lambda
+
+ option :crit,
+ short: '-c',
+ long: '--critical=VALUE',
+ description: 'Critical threshold for replication lag',
+ default: 1800,
+ # #YELLOW
+ proc: lambda { |s| s.to_i } # rubocop:disable Lambda
+
+ option :help,
+ short: '-h',
+ long: '--help',
+ description: 'Check MySQL replication status',
+ on: :tail,
+ boolean: true,
+ show_options: true,
+ exit: 0
+
+ def run
+ if config[:ini]
+ ini = IniFile.load(config[:ini])
+ section = ini['client']
+ db_user = section['user']
+ db_pass = section['password']
+ else
+ db_user = config[:user]
+ db_pass = config[:pass]
+ end
+ db_host = config[:host]
+
+ if [db_host, db_user, db_pass].any?(&:nil?)
+ unknown 'Must specify host, user, password'
+ end
+
+ begin
+ db = Mysql.new(db_host, db_user, db_pass, nil, config[:port], config[:socket])
+ results = db.query 'show slave status'
+
+ unless results.nil?
+ results.each_hash do |row|
+ # #YELLOW
+ # rubocop:disable all
+ warn "couldn't detect replication status" unless
+ %w(Slave_IO_State Slave_IO_Running Slave_SQL_Running Last_IO_Error Last_SQL_Error Seconds_Behind_Master).all? do |key|
+ row.key? key
+ end
+
+ # rubocop: enable all
+ slave_running = %w(Slave_IO_Running Slave_SQL_Running).all? do |key|
+ row[key] =~ /Yes/
+ end
+
+ output = 'Slave not running!'
+ output += ' STATES:'
+ output += " Slave_IO_Running=#{row['Slave_IO_Running']}"
+ output += ", Slave_SQL_Running=#{row['Slave_SQL_Running']}"
+ output += ", LAST ERROR: #{row['Last_SQL_Error']}"
+
+ critical output unless slave_running
+
+ replication_delay = row['Seconds_Behind_Master'].to_i
+
+ message = "replication delayed by #{replication_delay}"
+
+ if replication_delay > config[:warn] &&
+ replication_delay <= config[:crit]
+ warning message
+ elsif replication_delay >= config[:crit]
+ critical message
+ else
+ ok "slave running: #{slave_running}, #{message}"
+ end
+
+ end
+ ok 'show slave status was nil. This server is not a slave.'
+ end
+
+ rescue Mysql::Error => e
+ errstr = "Error code: #{e.errno} Error message: #{e.error}"
+ critical "#{errstr} SQLSTATE: #{e.sqlstate}" if e.respond_to?('sqlstate')
+
+ rescue => e
+ critical e
+
+ ensure
+ db.close if db
+ end
+ end
+end
diff --git a/sensu/files/checks/rabbitmq-amqp-alive.rb b/sensu/files/checks/rabbitmq-amqp-alive.rb
new file mode 100644
index 0000000..c8476ca
--- /dev/null
+++ b/sensu/files/checks/rabbitmq-amqp-alive.rb
@@ -0,0 +1,97 @@
+#!/usr/bin/env ruby
+# encoding: UTF-8
+#
+# RabbitMQ check amqp alive plugin
+# ===
+#
+# DESCRIPTION:
+# This plugin checks if RabbitMQ server is alive using the REST API
+#
+# PLATFORMS:
+# Linux, BSD, Solaris
+#
+# DEPENDENCIES:
+# RabbitMQ rabbitmq_management plugin
+# gem: sensu-plugin
+# gem: bunny
+#
+# LICENSE:
+# Copyright 2013 Milos Gajdos
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+
+require 'sensu-plugin/check/cli'
+require 'bunny'
+
+# main plugin class
+class CheckRabbitAMQPAlive < Sensu::Plugin::Check::CLI
+ option :host,
+ description: 'RabbitMQ host',
+ short: '-w',
+ long: '--host HOST',
+ default: 'localhost'
+
+ option :vhost,
+ description: 'RabbitMQ vhost',
+ short: '-v',
+ long: '--vhost VHOST',
+ default: '%2F'
+
+ option :username,
+ description: 'RabbitMQ username',
+ short: '-u',
+ long: '--username USERNAME',
+ default: 'guest'
+
+ option :password,
+ description: 'RabbitMQ password',
+ short: '-p',
+ long: '--password PASSWORD',
+ default: 'guest'
+
+ option :port,
+ description: 'RabbitMQ AMQP port',
+ short: '-P',
+ long: '--port PORT',
+ default: '5672'
+
+ option :ssl,
+ description: 'Enable SSL for connection to RabbitMQ',
+ long: '--ssl',
+ boolean: true,
+ default: false
+
+ def run
+ res = vhost_alive?
+
+ if res['status'] == 'ok'
+ ok res['message']
+ elsif res['status'] == 'critical'
+ critical res['message']
+ else
+ unknown res['message']
+ end
+ end
+
+ def vhost_alive?
+ host = config[:host]
+ port = config[:port]
+ username = config[:username]
+ password = config[:password]
+ vhost = config[:vhost]
+ ssl = config[:ssl]
+
+ begin
+ conn = Bunny.new("amqp#{ssl ? 's' : ''}://#{username}:#{password}@#{host}:#{port}/#{vhost}")
+ conn.start
+ { 'status' => 'ok', 'message' => 'RabbitMQ server is alive' }
+ rescue Bunny::PossibleAuthenticationFailureError
+ { 'status' => 'critical', 'message' => 'Possible authentication failure' }
+ rescue Bunny::TCPConnectionFailed
+ { 'status' => 'critical', 'message' => 'TCP connection refused' }
+ rescue => e
+ { 'status' => 'unknown', 'message' => e.message }
+ end
+ end
+end
diff --git a/sensu/files/checks/rabbitmq-overview-metrics.rb b/sensu/files/checks/rabbitmq-overview-metrics.rb
new file mode 100644
index 0000000..b190073
--- /dev/null
+++ b/sensu/files/checks/rabbitmq-overview-metrics.rb
@@ -0,0 +1,146 @@
+#!/usr/bin/env ruby
+# encoding: UTF-8
+#
+# RabbitMQ Overview Metrics
+# ===
+#
+# DESCRIPTION:
+# RabbitMQ 'overview' stats are similar to what is shown on the main page
+# of the rabbitmq_management web UI. Example:
+#
+# $ rabbitmq-queue-metrics.rb
+# host.rabbitmq.queue_totals.messages.count 0 1344186404
+# host.rabbitmq.queue_totals.messages.rate 0.0 1344186404
+# host.rabbitmq.queue_totals.messages_unacknowledged.count 0 1344186404
+# host.rabbitmq.queue_totals.messages_unacknowledged.rate 0.0 1344186404
+# host.rabbitmq.queue_totals.messages_ready.count 0 1344186404
+# host.rabbitmq.queue_totals.messages_ready.rate 0.0 1344186404
+# host.rabbitmq.message_stats.publish.count 4605755 1344186404
+# host.rabbitmq.message_stats.publish.rate 17.4130186829638 1344186404
+# host.rabbitmq.message_stats.deliver_no_ack.count 6661111 1344186404
+# host.rabbitmq.message_stats.deliver_no_ack.rate 24.6867565643405 1344186404
+# host.rabbitmq.message_stats.deliver_get.count 6661111 1344186404
+# host.rabbitmq.message_stats.deliver_get.rate 24.6867565643405 1344186404#
+#
+# PLATFORMS:
+# Linux, BSD, Solaris
+#
+# DEPENDENCIES:
+# RabbitMQ rabbitmq_management plugin
+# gem: sensu-plugin
+# gem: carrot-top
+#
+# LICENSE:
+# Copyright 2012 Joe Miller - https://github.com/joemiller
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+
+require 'sensu-plugin/metric/cli'
+require 'socket'
+require 'carrot-top'
+
+# main plugin class
+class RabbitMQMetrics < Sensu::Plugin::Metric::CLI::Graphite
+ option :host,
+ description: 'RabbitMQ management API host',
+ long: '--host HOST',
+ default: 'localhost'
+
+ option :port,
+ description: 'RabbitMQ management API port',
+ long: '--port PORT',
+ proc: proc(&:to_i),
+ default: 15_672
+
+ option :user,
+ description: 'RabbitMQ management API user',
+ long: '--user USER',
+ default: 'guest'
+
+ option :password,
+ description: 'RabbitMQ management API password',
+ long: '--password PASSWORD',
+ default: 'guest'
+
+ option :scheme,
+ description: 'Metric naming scheme, text to prepend to $queue_name.$metric',
+ long: '--scheme SCHEME',
+ default: "#{Socket.gethostname}.rabbitmq"
+
+ option :ssl,
+ description: 'Enable SSL for connection to the API',
+ long: '--ssl',
+ boolean: true,
+ default: false
+
+ def acquire_rabbitmq_info
+ begin
+ rabbitmq_info = CarrotTop.new(
+ host: config[:host],
+ port: config[:port],
+ user: config[:user],
+ password: config[:password],
+ ssl: config[:ssl]
+ )
+ rescue
+ warning 'could not get rabbitmq info'
+ end
+ rabbitmq_info
+ end
+
+ def run
+ timestamp = Time.now.to_i
+
+ rabbitmq = acquire_rabbitmq_info
+ overview = rabbitmq.overview
+
+ # overview['queue_totals']['messages']
+ if overview.key?('queue_totals') && !overview['queue_totals'].empty?
+ output "#{config[:scheme]}.queue_totals.messages.count", overview['queue_totals']['messages'], timestamp
+ output "#{config[:scheme]}.queue_totals.messages.rate", overview['queue_totals']['messages_details']['rate'], timestamp
+
+ # overview['queue_totals']['messages_unacknowledged']
+ output "#{config[:scheme]}.queue_totals.messages_unacknowledged.count", overview['queue_totals']['messages_unacknowledged'], timestamp
+ output "#{config[:scheme]}.queue_totals.messages_unacknowledged.rate", overview['queue_totals']['messages_unacknowledged_details']['rate'], timestamp
+
+ # overview['queue_totals']['messages_ready']
+ output "#{config[:scheme]}.queue_totals.messages_ready.count", overview['queue_totals']['messages_ready'], timestamp
+ output "#{config[:scheme]}.queue_totals.messages_ready.rate", overview['queue_totals']['messages_ready_details']['rate'], timestamp
+ end
+
+ if overview.key?('message_stats') && !overview['message_stats'].empty?
+ # overview['message_stats']['publish']
+ if overview['message_stats'].include?('publish')
+ output "#{config[:scheme]}.message_stats.publish.count", overview['message_stats']['publish'], timestamp
+ end
+ if overview['message_stats'].include?('publish_details') &&
+ overview['message_stats']['publish_details'].include?('rate')
+ output "#{config[:scheme]}.message_stats.publish.rate", overview['message_stats']['publish_details']['rate'], timestamp
+ end
+
+ # overview['message_stats']['deliver_no_ack']
+ if overview['message_stats'].include?('deliver_no_ack')
+ output "#{config[:scheme]}.message_stats.deliver_no_ack.count", overview['message_stats']['deliver_no_ack'], timestamp
+ end
+ if overview['message_stats'].include?('deliver_no_ack_details') &&
+ overview['message_stats']['deliver_no_ack_details'].include?('rate')
+ output "#{config[:scheme]}.message_stats.deliver_no_ack.rate", overview['message_stats']['deliver_no_ack_details']['rate'], timestamp
+ end
+
+ # overview['message_stats']['deliver_get']
+ if overview['message_stats'].include?('deliver_get')
+ output "#{config[:scheme]}.message_stats.deliver_get.count", overview['message_stats']['deliver_get'], timestamp
+ end
+ if overview['message_stats'].include?('deliver_get_details') &&
+ overview['message_stats']['deliver_get_details'].include?('rate')
+ output "#{config[:scheme]}.message_stats.deliver_get.rate", overview['message_stats']['deliver_get_details']['rate'], timestamp
+ end
+ end
+ # overview[object_totals]
+ overview['object_totals'].each do |metric, value|
+ output "#{config[:scheme]}.global_counts.#{metric}", value, timestamp
+ end
+ ok
+ end
+end
diff --git a/sensu/files/checks/rabbitmq-queue-metrics.rb b/sensu/files/checks/rabbitmq-queue-metrics.rb
new file mode 100644
index 0000000..6f097af
--- /dev/null
+++ b/sensu/files/checks/rabbitmq-queue-metrics.rb
@@ -0,0 +1,108 @@
+#!/usr/bin/env ruby
+# encoding: UTF-8
+#
+# RabbitMQ Queue Metrics
+# ===
+#
+# DESCRIPTION:
+# This plugin checks gathers the following per queue rabbitmq metrics:
+# - message count
+# - average egress rate
+# - "drain time" metric, which is the time a queue will take to reach 0 based on the egress rate
+# - consumer count
+#
+# PLATFORMS:
+# Linux, BSD, Solaris
+#
+# DEPENDENCIES:
+# RabbitMQ rabbitmq_management plugin
+# gem: sensu-plugin
+# gem: carrot-top
+#
+# LICENSE:
+# Copyright 2011 Sonian, Inc <chefs@sonian.net>
+# Copyright 2015 Tim Smith <tim@cozy.co> and Cozy Services Ltd.
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+
+require 'sensu-plugin/metric/cli'
+require 'socket'
+require 'carrot-top'
+
+# main plugin class
+class RabbitMQMetrics < Sensu::Plugin::Metric::CLI::Graphite
+ option :host,
+ description: 'RabbitMQ management API host',
+ long: '--host HOST',
+ default: 'localhost'
+
+ option :port,
+ description: 'RabbitMQ management API port',
+ long: '--port PORT',
+ proc: proc(&:to_i),
+ default: 15_672
+
+ option :user,
+ description: 'RabbitMQ management API user',
+ long: '--user USER',
+ default: 'guest'
+
+ option :password,
+ description: 'RabbitMQ management API password',
+ long: '--password PASSWORD',
+ default: 'guest'
+
+ option :scheme,
+ description: 'Metric naming scheme, text to prepend to $queue_name.$metric',
+ long: '--scheme SCHEME',
+ default: "#{Socket.gethostname}.rabbitmq"
+
+ option :filter,
+ description: 'Regular expression for filtering queues',
+ long: '--filter REGEX'
+
+ option :ssl,
+ description: 'Enable SSL for connection to the API',
+ long: '--ssl',
+ boolean: true,
+ default: false
+
+ def acquire_rabbitmq_queues
+ begin
+ rabbitmq_info = CarrotTop.new(
+ host: config[:host],
+ port: config[:port],
+ user: config[:user],
+ password: config[:password],
+ ssl: config[:ssl]
+ )
+ rescue
+ warning 'could not get rabbitmq queue info'
+ end
+ rabbitmq_info.queues
+ end
+
+ def run
+ timestamp = Time.now.to_i
+ acquire_rabbitmq_queues.each do |queue|
+ if config[:filter]
+ next unless queue['name'].match(config[:filter])
+ end
+
+ # calculate and output time till the queue is drained in drain metrics
+ drain_time = queue['messages'] / queue['backing_queue_status']['avg_egress_rate']
+ drain_time = 0 if drain_time.nan? # 0 rate with 0 messages is 0 time to drain
+ output([config[:scheme], queue['name'], 'drain_time'].join('.'), drain_time.to_i, timestamp)
+
+ %w(messages consumers).each do |metric|
+ output([config[:scheme], queue['name'], metric].join('.'), queue[metric], timestamp)
+ end
+
+ # fetch the average egress rate of the queue
+ rate = format('%.4f', queue['backing_queue_status']['avg_egress_rate'])
+ output([config[:scheme], queue['name'], 'avg_egress_rate'].join('.'), rate, timestamp)
+ end
+ ok
+ end
+end
diff --git a/sensu/files/checks/svc_error_database b/sensu/files/checks/svc_error_database
new file mode 100644
index 0000000..c86b6aa
--- /dev/null
+++ b/sensu/files/checks/svc_error_database
@@ -0,0 +1,335 @@
+An automatic system recovery has started. All configuration commands are blocked. 1001
+The error event log is full. 1002
+"The following causes are possible:
+The node is missing.
+The node is no longer a functional member of the system." 1196
+A node has been missing for 30 minutes. 1195
+The software install process has failed. 2010
+The software upgrade package delivery has failed. 2010
+Unable to connect to the SMTP (email) server 2600
+Unable to send mail through the SMTP (email) server 2601
+The Metro Mirror or Global Mirror feature capacity is not set. 3030
+The FlashCopy® feature capacity is not set. 3031
+The Virtualization feature has exceeded the amount that is licensed. 3032
+The FlashCopy feature has exceeded the amount that is licensed. 3032
+The Metro Mirror or Global Mirror feature has exceeded the amount that is licensed. 3032
+The usage for the thin-provisioned volume is not licensed. 3033
+The value set for the virtualization feature capacity is not valid. 3029
+A physical disk FlashCopy feature license is required. 3035
+A physical disk Metro Mirror and Global Mirror feature license is required. 3036
+A virtualization feature license is required. 3025
+Automatic recovery of offline node failed. 1194
+Unable to send email to any of the configured email servers. 3081
+The external virtualization feature license limit was exceeded. 3032
+Unable to connect to LDAP server. 2251
+The LDAP configuration is not valid. 2250
+The limit for the compression feature license was exceeded. 3032
+The limit for the compression feature license was exceeded. 3032
+Unable to connect to LDAP server that has been automatically configured. 2256
+Invalid LDAP configuration for automatically configured server. 2255
+A licensable feature's trial-timer has reached 0. The feature has now been deactivated. 3082
+A trial of a licensable feature will expire in 5 days. 3083
+A trial of a licensable feature will expire in 10 days. 3084
+A trial of a licensable feature will expire in 15 days. 3085
+A trial of a licensable feature will expire in 45 days. 3086
+Easy Tier feature license limit exceeded. 3032
+FlashCopy feature license limit exceeded. 3032
+External virtualization feature license limit exceeded. 3032
+Remote copy feature license limit exceeded. 3032
+The node ran out of base event sources. As a result, the node has stopped and exited the system. 2030
+The number of device logins has reduced. 1630
+A software error has occurred. 2030
+The block size is invalid, the capacity or LUN identity has changed during the managed disk initialization. 1660
+The managed disk is excluded because of excessive errors. 1310
+The remote port is excluded for a managed disk and node. 1220
+The local port is excluded. 1210
+The login is excluded. 1230
+The local port is excluded. 1211
+A timeout has occurred as a result of excessive processing time. 1340
+An error recovery procedure has occurred. 1370
+A managed disk I/O error has occurred. 1310
+The managed disk error count threshold has exceeded. 1310
+There are too many devices presented to the clustered system. 1200
+There are too many managed disks presented to the cluster (system). 1200
+There are too many LUNs presented to a node. 1200
+There are too many drives presented to a cluster (system). 1200
+A disk I/O medium error has occurred. 1320
+A suitable MDisk or drive for use as a quorum disk was not found. 1330
+The quorum disk is not available. 1335
+A controller configuration is not supported. 1625
+A login transport fault has occurred. 1360
+"A managed disk error recovery procedure (ERP) has occurred. The node or controller reported the following:
+Sense
+Key
+Code
+Qualifier" 1370
+One or more MDisks on a controller are degraded. 1623
+The controller configuration limits failover. 1625
+The controller configuration uses the RDAC mode; this is not supported. 1624
+Persistent unsupported controller configuration. 1695
+The controller system device is only connected to the node through a single initiator port. 1627
+The controller system device is only connected to the node through a single target port. 1627
+The controller system device is only connected to the clustered system nodes through a single target port. 1627
+The controller system device is only connected to the clustered system nodes through half of the expected target ports. 1627
+The controller system device has disconnected all target ports to the clustered system nodes. 1627
+A flash drive failed. A rebuild is required. 1201
+A flash drive is offline as a result of a drive hardware error. 1205
+A flash drive is reporting a predictive failure analysis (PFA). 1215
+A flash drive is reporting too many errors. 1215
+An unrecognized SAS device. 1665
+SAS error counts exceeded the warning thresholds. 1216
+SAS errors exceeded critical thresholds. 1216
+The drive initialization failed because of an unknown block size or a block size that is not valid; an unknown capacity or a capacity that is not valid; or was not able to set the required mode pages. 1661
+A flash drive is offline due to excessive errors. 1311
+A flash drive exceeded the warning temperature threshold. 1217
+A flash drive exceeded the offline temperature threshold. 1218
+A drive exceeded the warning temperature threshold. 1217
+Drive medium error. 1321
+Controller indicates that it does not support descriptor sense for LUNs that are greater than 2 TBs. 1625
+Too many enclosures were presented to a cluster (system). 1200
+The flash drive format was corrupted. 1204
+The block size for the flash drive was incorrect. 1204
+Too many controller target ports were presented to the cluster (system). 1200
+Too many target ports were presented to the clustered system from a single controller. 1200
+The drive is offline as a result of a drive hardware error. 1680
+The drive is reporting predictive failure analysis (PFA) errors. 1680
+The drive is reporting too many errors. 1680
+The drive format is corrupted. 1206
+The block size for the drive was incorrect. 1206
+The drive is offline due to excessive errors. 1680
+The error counts for the SAS drive exceeded the warning thresholds. 1285
+The SAS device was not recognized. 1666
+The SAS enclosure was not recognized. 1666
+The SAS device was not able to be identified. 1666
+There were excessive medium errors on the drive. 1680
+There were excessive overall timeout errors on the drive. 1680
+There were excessive times when the drive stopped. 1680
+A drive failed validation testing. 1680
+There were excessive medium errors on the flash drive. 1215
+There were excessive overall timeout errors on the flash drive. 1204
+Login excluded. 1231
+Drive failed. 1687
+The drive initialization failed because of an unknown block size or a block size that is not valid; an unknown capacity or a capacity that is not valid; or was not able to set the required mode pages. 1680
+A drive is reporting excessive errors. 1685
+There are too many drives presented to a cluster (system). 1200
+Reserved SAS port has devices attached. 1669
+Drive firmware download cancelled because of system changes 3090
+Drive firmware download cancelled because of a drive download problem 3090
+There are too many medium errors on the managed disk. 1610
+A managed disk group is offline. 1620
+There are insufficient virtual extents. 2030
+The managed disk has bad blocks. On an external controller, this can only be a copied medium error. 1840
+The system failed to create a bad block because MDisk already has the maximum number of allowed bad blocks. 1226
+The system failed to create a bad block because the clustered system already has the maximum number of allowed bad blocks. 1225
+The trigger prepare command has failed because of a cache flush failure. 1900
+The mapping is stopped because of the error that is indicated in the data. 1910
+The mapping is stopped because of a clustered system or complete I/O group failure, and the current state of the relationship could not be recovered. 1895
+One or more power supply unit fans have failed. 1124
+A fan is operating outside the expected range. 1126
+There was a fan status communications failure. 1126
+The power supply unit is not installed. 1128
+The power supply unit has indicated an input power failure. 1138
+The power supply unity has indicated an output failure. 1126
+The power supply unit has failed. 1124
+There is no communication with the power supply unit. 1148
+The model type for this enclosure is not valid. 1124
+The power supply unit type is unknown to this product. 1124
+The power supply unit serial number is not valid. 1124
+The canister temperature is at the warning level. 1098
+The canister temperature is at the critical level. 1095
+The SAS cable was excluded because of a missing device. 1260
+A SAS cable was excluded because too many change events were caused. 1260
+A SAS cable was excluded. 1255
+A SAS cable is operating at a reduced speed. 1260
+A SAS cable was excluded because frames were dropped. 1260
+A SAS cable was excluded because the enclosure discovery timed out. 1260
+A SAS cable is not present. 1265
+A canister was removed from the system. 1036
+A canister has been in a degraded state for too long and cannot be recovered. 1034
+A canister is encountering communication problems. 1038
+The canister VPD is not valid. 1032
+The canister has experienced too many resets. 1032
+The drive slot is causing the network to be unstable. 1686
+The drive slot is not running at 6 Gbps 1686
+The drive slot is dropping frames. 1686
+The drive is visible through only one SAS port. 1686
+The drive power control is not functional. 1008
+The drive slot contains a device that is not responding to queries. 1685
+The managed enclosure is not visible from any node canisters. 1042
+The electronics in the enclosure has failed. 1694
+The electronics in the enclosure has experienced a critical failure. 1008
+The SAS network has too many errors. 1048
+The SAS network has too many errors. 1048
+The firmware update for the enclosure component has failed. 3015
+More than one initiator port was detected on the same strand. 1005
+The order of the enclosures is different on each strand. 1005
+Multiple canisters are connected to a single canister port. 1005
+Canister 1 is connected to canister 2. 1005
+An enclosure is connected to more than one I/O group. 1005
+A managed enclosure is connected to the wrong I/O group. 1005
+An enclosure is connected to more than one chain. 1005
+Too many canisters are connected to a strand. 1005
+The canister is connected to the wrong port. 1005
+A SAS cable is excluded because of single port active drives. 1260
+More than one canister was detected at the same hop count. 1005
+The node location is not able to be detected. 1031
+An enclosure display cannot be updated. 1694
+There is an enclosure battery fault. 1118
+An enclosure battery is missing. 1112
+An enclosure battery is nearing end of life. 1114
+An enclosure battery is at end of life. 1113
+An enclosure battery conditioning is required but not possible. 1131
+There was an enclosure battery communications error. 1116
+A SAS port is active, but no enclosures can be detected. 1005
+There is a connectivity problem between a canister and an enclosure. 1036
+The FRU identity of the enclosure is not valid. 1008
+A new enclosure FRU was detected and needs to be configured. 1041
+The internal device on a node canister was excluded because of too many change events. 1034
+The internal connector on the node canister was excluded as the cause of single ported drives. 1034
+The canister temperature sensor cannot be read. 1034
+The enclosure contains both a node canister and an expansion canister. 1037
+The discovery failed to complete. 1048
+The VPD for the enclosure cannot be read. 1048
+There are too many self-initiated resets on the enclosure. 1048
+The slots are powered off. 1048
+The expansion or control enclosure temperature is at the critical level.
+The relationship is stopped because of a clustered system or complete I/O group failure, and the current state of the mapping could not be recovered. 1700
+A Metro Mirror or Global Mirror relationship or consistency group exists within a clustered system, but its partnership has been deleted. 3080
+A Global Mirror relationship has stopped because of a persistent I/O error. 1920
+A remote copy has stopped because of a persistent I/O error. 1915
+Remote copy has stopped. 1720
+There are too many clustered system partnerships. The number of partnerships has been reduced. 1710
+There are too many clustered system partnerships. The system has been excluded. 1710
+Background copy process for the Remote Copy was blocked. 1960
+The Global Mirror secondary volume is offline. The relationship has pinned hardened write data for this volume. 1925
+The Global Mirror secondary volume is offline due to missing IO group partner node. The relationship has pinned hardened write data for this volume but the node containing the required data is currently offline. 1730
+Global Mirror performance is likely to be impacted. A large amount of pinned data for the offline volumes has reduced the resource available to the global mirror secondary disks. 1925
+The thin-provisioned volume copy is offline because there is insufficient space. 1865
+The thin-provisioned volume copy is offline because the metadata is corrupt. 1862
+The thin-provisioned volume copy is offline because the repair has failed. 1860
+The compressed volume copy is offline because there is insufficient space. 1865
+The compressed volume copy is offline because the metadata is corrupt. 1862
+The compressed volume copy is offline because the repair has failed. 1860
+The compressed volume copy has bad blocks. 1850
+Unable to mirror medium error during volume copy synchronization 1950
+The mirrored volume is offline because the data cannot be synchronized. 1870
+The repair process for the mirrored disk has stopped because there is a difference between the copies. 1600
+Unrecognized node error. 1083
+Detected memory size does not match the expected memory size. 1022
+The WWNN that is stored on the service controller and the WWNN that is stored on the drive do not match. 1192
+Unable to detect any Fibre Channel adapter. 1016
+The system board processor has failed. 1020
+The internal disk file system of the node is damaged. 1187
+Unable to update BIOS settings. 1027
+Unable to update the service processor firmware for the system board. 1020
+The ambient temperature is too high while the system is starting. 1182
+Cannot form clustered system due to lack of resources. 1192
+Duplicate WWNN detected on the SAN. 1192
+A node is unable to communicate with other nodes. 1192
+The node hardware does not meet minimum requirements. 1183
+Too many software failures. 1188
+The node software is damaged. 1187
+The clustered system data cannot be read. 1030
+The clustered system data was not saved when power was lost. 1194
+Unable to read the service controller ID. 1044
+Node held in service state. 1189
+Node canister has the incorrect model for the enclosure. 3020
+Detected hardware is not a valid configuration. 1198
+Detected hardware needs activation. 1199
+The control canister temperature is at the critical level. 528
+The control canister battery is too cold, and at the critical level. 653
+The control canister battery is too hot, and at the critical level. 654
+The ambient temperature of the canister is close to the point where it stops performing I/O and enters a service state. The canister is currently continuing to operate. 768
+The temperature of the CPU in the canister is close to the point where the canister stops performing I/O and enters service state. 769
+There was a PCIe link failure between canisters. 1006
+The PCIe link is degraded between canisters. 1052
+The PCIe link for the CPU is degraded. 1034
+The Fibre Channel ports are not operational. 1060
+Clustered system path failure. 1550
+The SAN is not correctly zoned. As a result, more than 512 ports on the SAN have logged into one Flex System V7000 Storage Node port. 1800
+There are fewer Fibre Channel ports operational than are configured. 1061
+One or more Fibre Channel ports are running at a speed that is lower than the last saved speed. 1065
+A duplicate Fibre Channel frame has been detected, which indicates that there is an issue with the Fibre Channel fabric. Other Fibre Channel errors might also be generated. 1203
+Incorrect canister position. 1192
+No enclosure identity; cannot get status from partner. 1192
+Incorrect enclosure type. 1192
+No enclosure identity and partner does match. 1192
+No enclosure identity and partner does not match. 1192
+No enclosure identity and no state on partner. 1192
+No enclosure identity and no node state. 1192
+Clustered system identity is different on the enclosure and the node. 1023
+Cannot read enclosure identity. 1036
+Detected memory size does not match the expected memory size. 1032
+Enclosure VPD is inconsistent 1008
+The system board service processor has failed. 1034
+The internal disk file system of the node is damaged. 1187
+Unable to update the service processor firmware of the system board. 1034
+Ambient temperature is too high during system startup. 1098
+The internal PCIe switch of the node canister failed. 1034
+Cannot form clustered system due to lack of resources. 1192
+Duplicate WWNN detected on the SAN. 1133
+The node hardware does not meet the minimum requirements. 1034
+The internal drive of the node is failing. 1032
+The node software is inconsistent. 1187
+The clustered system data cannot be read. 1187
+The clustered system data was not saved when power was lost. 1194
+The canister battery is missing. 1153
+The canister battery has failed. 1154
+The canister battery’s temperature is too low. 1156
+The canister battery’s temperature is too high 1157
+The canister battery communications fault. 1158
+The canister battery has insufficient charge. 1184
+FC adapter missing. 1045
+FC adapter failed. 1046
+FC adapter PCI error. 1046
+FC adapter degraded. 1045
+Fewer Fibre Channel ports operational. 1061
+Fewer Fibre Channel IO ports operational. 1450
+SAS adapter missing. 1045
+SAS adapter failed. 1046
+SAS adapter PCI error. 1046
+SAS adapter degraded. 1046
+Ethernet adapter missing. 1045
+Ethernet adapter failed. 1046
+Ethernet adapter PCI error. 1046
+Ethernet adapter degraded. 1046
+Fewer Ethernet ports operational. 1401
+Bus adapter missing. 1032
+Bus adapter failed. 1032
+Bus adapter PCI error. 1032
+Bus adapter degraded. 1032
+Inter-canister PCIe link failure. 1006
+Ambient temperature warning. 1094
+CPU temperature warning. 1093
+Detected hardware is not a valid configuration. 1198
+Detected hardware needs activation. 1199
+Fabric too large. 1800
+Unable to determine the vital product data (VPD) for an FRU. This is probably because a new FRU has been installed and the software does not recognize that FRU. The clustered system continues to operate; however, you must upgrade the software to fix this warning. 2040
+The node warm started after a software error. 2030
+A connection to a configured remote system has been lost because of a connectivity problem. 1715
+A connection to a configured remote system has been lost because of too many minor errors. 1716
+The internal disk for a node has failed. 1030
+The hard disk is full and cannot capture any more output. 2030
+One of the two power supply units in the node has failed. 1096
+One of the two power supply units in the node cannot be detected. 1096
+One of the two power supply units in the node is without power. 1097
+Degraded PCIe lanes on a high-speed SAS adapter. 1121
+A PCI bus error occurred on a high-speed SAS adapter. 1121
+A high-speed SAS adapter requires a PCI bus reset. 1122
+Vital product data (VPD) is corrupt on high-speed SAS adapter. 1121
+A high-speed SAS controller is missing. 1032
+Degraded PCIe lanes on a high-speed SAS adapter. 1032
+A PCI bus error occurred on a high-speed SAS adapter. 1032
+A high-speed SAS adapter requires a PCI bus reset. 1034
+The limit on the number of clustered system secure shell (SSH) sessions has been reached. 2500
+Unable to access the Network Time Protocol (NTP) network time server. 2700
+An Ethernet port failure has occurred. 1401
+A server error has occurred. 2100
+An array MDisk has deconfigured members and has lost redundancy. 1689
+An array MDisk is corrupt because of lost metadata. 1240
+An array MDisk has taken a spare member that is not an exact match to the array goals. 1692
+An array has members that are located in a different I/O group. 1688
+An array MDisk is no longer protected by an appropriate number of suitable spares. 1690
+An array MDisk is offline. The metadata for the inflight writes is on a missing node. 1243
+An array MDisk is offline. Metadata on the missing node contains needed state information. 1243
+
diff --git a/sensu/files/client.json b/sensu/files/client.json
new file mode 100644
index 0000000..89324b6
--- /dev/null
+++ b/sensu/files/client.json
@@ -0,0 +1,71 @@
+{%- from "linux/map.jinja" import system with context -%}
+{%- from "linux/map.jinja" import network with context -%}
+{%- from "sensu/map.jinja" import client with context -%}
+{
+ "client": {
+ "name": "{{ system.name }}.{{ system.domain }}",
+ "hostname": "{{ system.name }}",
+ "fqdn": "{{ system.name }}.{{ system.domain }}",
+ "cluster": "{{ pillar.linux.system.cluster }}",
+ "graphite_name": "{{ system.name|replace('.', '_') }}_{{ system.domain|replace('.', '_') }}",
+ "address": "{% if grains.ipv4[0] == '127.0.0.1' %}{{ grains.ipv4[1] }}{% else %}{{ grains.ipv4[0] }}{% endif %}",
+ {%- if pillar.get('mysql', {}).server is defined %}
+ {%- if pillar.mysql.server.enabled is defined %}
+ {%- from "mysql/map.jinja" import server with context %}
+ "mysql": {
+ "user": "{{ server.admin.user }}",
+ "password": "{{ server.admin.password }}",
+ "host": "{{ server.bind.address }}",
+ "port": "{{ server.bind.port }}"
+ },
+ {%- endif %}
+ {%- endif %}
+ {%- if pillar.get('galera', {}).cluster is defined %}
+ {%- from "mysql/map.jinja" import server with context %}
+ "mysql": {
+ "user": "{{ server.admin.user }}",
+ "password": "{{ server.admin.password }}",
+ "host": "{{ server.bind.address }}",
+ "port": "{{ server.bind.port }}"
+ },
+ {%- endif %}
+ {%- if pillar.get('keystone', {}).client is defined %}
+ {%- if pillar.keystone.client.enabled %}
+ {%- from "keystone/map.jinja" import client with context %}
+ "openstack": {
+ "tenant": "{{ client.server.tenant }}",
+ "user": "{{ client.server.user }}",
+ "password": "{{ client.server.password }}",
+ "host": "{{ client.server.host }}",
+ "port": "{{ client.server.public_port }}"
+ },
+ {%- endif %}
+ {%- endif %}
+ "keepalive": {
+ "thresholds": {
+ "warning": {{ client.get('keepalive_warning', '120') }},
+ "critical": {{ client.get('keepalive_critical', '180') }}
+ },
+ "handler": "default"
+ },
+ "subscriptions": [
+ {% for key in grains.roles %}
+ "local-{{ key|replace('.', '-') }}",
+ {%- endfor %}
+ {%- if pillar.sensu.server is defined %}
+ "remote-internet",
+ "remote-network",
+ {%- endif %}
+ "local-{{ grains.os|lower }}-system",
+ {%- if grains.os != grains.os_family %}
+ "local-{{ grains.os_family|lower }}-system",
+ {%- endif %}
+ {%- if pillar.linux is defined %}
+ "{{ pillar.linux.system.name|replace('.', '-') }}-{{ pillar.linux.system.domain|replace('.', '-') }}",
+ {%- else %}
+ "{{ pillar.system.name|replace('.', '-') }}",
+ {%- endif %}
+ "local-common"
+ ]
+ }
+}
\ No newline at end of file
diff --git a/sensu/files/handlers/default.json b/sensu/files/handlers/default.json
new file mode 100644
index 0000000..1475f33
--- /dev/null
+++ b/sensu/files/handlers/default.json
@@ -0,0 +1,17 @@
+{%- set handler = pillar.sensu.server.handler[handler_name] %}
+{
+ "handlers": {
+ "default": {
+ "type": "set",
+ "handlers": [
+ {% for set in handler.set %}"{{ set }}"{% if not loop.last %}, {% endif %}{% endfor %}
+ ],
+ "severities": [
+ "ok",
+ "critical",
+ "warning",
+ "unknown"
+ ]
+ }
+ }
+}
diff --git a/sensu/files/handlers/flapjack.json b/sensu/files/handlers/flapjack.json
new file mode 100644
index 0000000..4dd6c46
--- /dev/null
+++ b/sensu/files/handlers/flapjack.json
@@ -0,0 +1,14 @@
+{%- set handler = pillar.sensu.server.handler[handler_name] %}
+{
+ "handlers": {
+ "flapjack": {
+ "host": "{{ handler.get('host', 'localhost') }}",
+ "port": {{ handler.get('port', '6379') }},
+ "db": "{{ handler.get('number', '0') }}",
+ "severities": [
+ "ok",
+ "critical"
+ ]
+ }
+ }
+}
diff --git a/sensu/files/handlers/graphite.json b/sensu/files/handlers/graphite.json
new file mode 100644
index 0000000..5b0fa7f
--- /dev/null
+++ b/sensu/files/handlers/graphite.json
@@ -0,0 +1,17 @@
+{
+ "handlers": {
+ "graphite": {
+ "type": "amqp",
+ "exchange": {
+ "type": "topic",
+ "name": "metrics_break_it",
+ "durable": "true"
+ },
+ "mutator": "only_check_output",
+ "severities": [
+ "ok",
+ "critical"
+ ]
+ }
+ }
+}
\ No newline at end of file
diff --git a/sensu/files/handlers/graphite_event.json b/sensu/files/handlers/graphite_event.json
new file mode 100644
index 0000000..693fc58
--- /dev/null
+++ b/sensu/files/handlers/graphite_event.json
@@ -0,0 +1,9 @@
+{
+ "graphite_event": {
+ "server_uri": "https://graphite.example.com:443/events/",
+ "tags": [
+ "custom_tag_a",
+ "custom_tag_b"
+ ]
+ }
+}
diff --git a/sensu/files/handlers/handler_flapjack.json b/sensu/files/handlers/handler_flapjack.json
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/sensu/files/handlers/handler_flapjack.json
diff --git a/sensu/files/handlers/handler_mailer.json b/sensu/files/handlers/handler_mailer.json
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/sensu/files/handlers/handler_mailer.json
diff --git a/sensu/files/handlers/logstash.json b/sensu/files/handlers/logstash.json
new file mode 100644
index 0000000..810b3f8
--- /dev/null
+++ b/sensu/files/handlers/logstash.json
@@ -0,0 +1,14 @@
+{
+ "handlers": {
+ "logstash": {
+ "server": "redis.example.tld",
+ "port": 6379,
+ "list": "logstash",
+ "type": "sensu-logstash"
+ "severities": [
+ "ok",
+ "critical"
+ ]
+ }
+ }
+}
\ No newline at end of file
diff --git a/sensu/files/handlers/mail.json b/sensu/files/handlers/mail.json
new file mode 100644
index 0000000..921ead1
--- /dev/null
+++ b/sensu/files/handlers/mail.json
@@ -0,0 +1,34 @@
+{%- set handler = pillar.sensu.server.handler[handler_name] %}
+{%- if handler_setting == "handler" %}
+{
+ "handlers": {
+ "mail": {
+ "type": "pipe",
+ "command": "/etc/sensu/handlers/mail.py"
+ }
+ }
+}
+{%- endif %}
+{%- if handler_setting == "config" %}
+{
+ "mail": {
+ {%- if handler.user is defined %}
+ "user": "{{ handler.user }}",
+ {%- endif %}
+ {%- if handler.password is defined %}
+ "password": "{{ handler.password }}",
+ {%- endif %}
+ {%- if handler.authentication is defined %}
+ "authentication": "{{ handler.authentication }}",
+ {%- endif %}
+ {%- if handler.get('encryption', 'none') == 'ssl' %}
+ "encryption": "{{ handler.get('encryption') }}",
+ {%- endif %}
+ "subject": "Sensu Alert",
+ "from": "sensu@{{ handler.get('domain', 'example.org') }}",
+ "to": "{{ handler.mail_to }}",
+ "host": "{{ handler.get('host', 'localhost') }}",
+ "port": "{{ handler.get('port', '25') }}"
+ }
+}
+{%- endif %}
diff --git a/sensu/files/handlers/mailer.json b/sensu/files/handlers/mailer.json
new file mode 100644
index 0000000..3c95de0
--- /dev/null
+++ b/sensu/files/handlers/mailer.json
@@ -0,0 +1,35 @@
+{%- set handler = pillar.sensu.server.handler[handler_name] %}
+{%- if handler_setting == "handler" %}
+{
+ "handlers": {
+ "mailer": {
+ "type": "pipe",
+ "command": "/etc/sensu/handlers/mailer.rb"
+ }
+ }
+}
+{%- endif %}
+{%- if handler_setting == "config" %}
+{
+ "mailer": {
+ "admin_gui": "http://admin.example.com:8080/",
+ "mail_from": "sensu@{{ handler.get('domain', 'example.org') }}",
+ "mail_to": "{{ handler.mail_to }}",
+ "smtp_address": "{{ handler.get('host', 'localhost') }}",
+ "smtp_port": "{{ handler.get('port', '25') }}",
+ "smtp_domain": "{{ handler.get('domain', 'example.org') }}"
+ {%- if handler.user is defined %}
+ ,"smtp_username": "{{ handler.user }}"
+ {%- endif %}
+ {%- if handler.password is defined %}
+ ,"smtp_password": "{{ handler.password }}"
+ {%- endif %}
+ {%- if handler.authentication is defined %}
+ ,"smtp_authentication": "{{ handler.authentication }}"
+ {%- endif %}
+ {%- if handler.get('encryption', 'none') == 'ssl' %}
+ ,"smtp_enable_starttls_auto": true
+ {%- endif %}
+ }
+}
+{%- endif %}
diff --git a/sensu/files/handlers/sccd.json b/sensu/files/handlers/sccd.json
new file mode 100644
index 0000000..edfac2b
--- /dev/null
+++ b/sensu/files/handlers/sccd.json
@@ -0,0 +1,45 @@
+{%- for handler in pillar.sensu.server.handlers %}
+{%- if handler.name == handler_name %}
+{%- if handler_setting == "handler" %}
+{
+ "handlers": {
+ "sccd": {
+ "type": "pipe",
+ {%- if handler.mutator is defined %}
+ "mutator": "{{ handler.mutator }}",
+ {%- endif %}
+ "command": "/etc/sensu/handlers/sccd.py"
+ }
+ }
+}
+{%- endif %}
+{%- if handler_setting == "config" %}
+{
+ "sccd": {
+ {%- if handler.mail_user is defined %}
+ "mail_user": "{{ handler.mail_user }}",
+ {%- endif %}
+ {%- if handler.mail_password is defined %}
+ "mail_password": "{{ handler.mail_password }}",
+ {%- endif %}
+ {%- if handler.mail_authentication is defined %}
+ "mail_authentication": "{{ handler.mail_authentication }}",
+ {%- endif %}
+ {%- if handler.get('mail_encryption', 'none') == 'ssl' %}
+ "mail_encryption": "{{ handler.get('mail_encryption') }}",
+ {%- endif %}
+ "mail_subject": "SCCD Incident",
+ "mail_from": "{{ handler.mail_user }}",
+ "mail_host": "{{ handler.get('mail_host', 'localhost') }}",
+ "mail_port": "{{ handler.get('mail_port', '25') }}",
+ {%- if handler.kedb_host is defined %}
+ "kedb_host": "{{ handler.get('kedb_host', 'localhost') }}",
+ "kedb_port": "{{ handler.get('kedb_port', '6754') }}",
+ {%- endif %}
+ "sccd_email": "{{ handler.sccd_email }}",
+ "sccd_site": "{{ handler.sccd_site }}"
+ }
+}
+{%- endif %}
+{%- endif %}
+{%- endfor %}
diff --git a/sensu/files/handlers/slack.json b/sensu/files/handlers/slack.json
new file mode 100644
index 0000000..2ec633c
--- /dev/null
+++ b/sensu/files/handlers/slack.json
@@ -0,0 +1,38 @@
+{%- set handler = pillar.sensu.server.handler[handler_name] %}
+{%- if handler_setting == "handler" %}
+{
+ "handlers": {
+ "slack": {
+ "type": "pipe",
+ {%- if handler.mutator is defined %}
+ "mutator": "{{ handler.mutator }}",
+ {%- endif %}
+ "command": "/etc/sensu/handlers/slack.rb",
+ "severities": [
+ "ok",
+ "critical",
+ "warning",
+ "unknown"
+ ]
+ }
+ }
+}
+{%- endif %}
+{%- if handler_setting == "config" %}
+{
+ "slack": {
+ "webhook_url": "{{ handler.webhook_url }}",
+ "channel": "{{ handler.channel }}",
+ "markdown_enabled": false
+ }
+}
+{%- endif %}
+{#
+ "message_prefix": "optional prefix - can be used for mentions",
+ "surround": "optional - can be used for bold(*), italics(_), code(`) and preformatted(```)",
+ "bot_name": "optional bot name, defaults to slack defined",
+ "proxy_addr": "optional - your proxy address for http proxy, like squid, i.e. 192.168.10.100",
+ "proxy_port": "optional - should be port used by proxy, i.e. 3128",
+ }
+}
+#}
diff --git a/sensu/files/handlers/statsd.json b/sensu/files/handlers/statsd.json
new file mode 100644
index 0000000..6eeca0a
--- /dev/null
+++ b/sensu/files/handlers/statsd.json
@@ -0,0 +1,29 @@
+{%- set handler = pillar.sensu.server.handler[handler_name] %}
+{%- if handler_setting == "handler" %}
+{
+ "handlers": {
+ "statsd": {
+ "type": "pipe",
+ {%- if handler.mutator is defined %}
+ "mutator": "{{ handler.mutator }}",
+ {%- endif %}
+ "command": "/etc/sensu/handlers/statsd.py",
+ "severities": [
+ "ok",
+ "critical",
+ "warning",
+ "unknown"
+ ]
+ }
+ }
+}
+{%- endif %}
+{%- if handler_setting == "config" %}
+{
+ "statsd": {
+ "host": "{{ handler.get('host', 'localhost') }}",
+ "port": "{{ handler.get('port', '8125') }}",
+ "prefix": "{{ handler.get('prefix', 'mon') }}"
+ }
+}
+{%- endif %}
diff --git a/sensu/files/handlers/stdout.json b/sensu/files/handlers/stdout.json
new file mode 100644
index 0000000..e87e21c
--- /dev/null
+++ b/sensu/files/handlers/stdout.json
@@ -0,0 +1,8 @@
+{
+ "handlers": {
+ "stdout": {
+ "type": "pipe",
+ "command": "cat"
+ }
+ }
+}
\ No newline at end of file
diff --git a/sensu/files/mutator.json b/sensu/files/mutator.json
new file mode 100644
index 0000000..9f212ca
--- /dev/null
+++ b/sensu/files/mutator.json
@@ -0,0 +1,11 @@
+{%- for mutator in pillar.sensu.server.mutators %}
+{%- if mutator_name == mutator.name %}
+{
+ "mutators": {
+ "{{ mutator.name }}": {
+ "command": "{{ mutator.command }}"
+ }
+ }
+}
+{%- endif %}
+{%- endfor %}
diff --git a/sensu/files/plugins/handlers/notification/flapjack.rb b/sensu/files/plugins/handlers/notification/flapjack.rb
new file mode 100644
index 0000000..43f5e2d
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/flapjack.rb
@@ -0,0 +1,84 @@
+# Sends events to Flapjack for notification routing. See http://flapjack.io/
+#
+# This extension requires Flapjack >= 0.8.7
+#
+# In order for Flapjack to keep its entities up to date, it is necssary to set
+# metric to "true" for each check that is using the flapjack handler extension.
+#
+# Here is an example of what the Sensu configuration for flapjack should
+# look like, assuming your Flapjack's redis service is running on the
+# same machine as the Sensu server:
+#
+# {
+# "flapjack": {
+# "host": "localhost",
+# "port": 6379,
+# "db": "0"
+# }
+# }
+#
+# Copyright 2014 Jive Software and contributors.
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE for details.
+
+module Sensu
+ module Extension
+ class Flapjack < Handler
+ def name
+ 'flapjack'
+ end
+
+ def description
+ 'sends sensu events to the flapjack redis queue'
+ end
+
+ def options
+ return @options if @options
+ @options = {
+ :host => '127.0.0.1',
+ :port => 6379,
+ :channel => 'events',
+ :db => 0
+ }
+ if @settings[:flapjack].is_a?(Hash)
+ @options.merge!(@settings[:flapjack])
+ end
+ @options
+ end
+
+ def post_init
+ @redis = Sensu::Redis.connect(options)
+ @redis.on_error do |error|
+ @logger.warn('Flapjack Redis instance not available on ' + options[:host])
+ end
+ end
+
+ def run(event_data)
+ event = Oj.load(event_data)
+ client = event[:client]
+ check = event[:check]
+ tags = []
+ tags << client[:environment] unless client[:environment].nil?
+ unless check[:subscribers].nil? || check[:subscribers].empty?
+ tags.concat(client[:subscriptions] - (client[:subscriptions] - check[:subscribers]))
+ else
+ tags.concat(client[:subscriptions])
+ end
+ details = ['Address:' + client[:address]]
+ details << 'Tags:' + tags.join(',')
+ flapjack_event = {
+ :entity => client[:name],
+ :check => check[:name],
+ :type => 'service',
+ :state => Sensu::SEVERITIES[check[:status]] || 'unknown',
+ :summary => check[:output],
+ :details => details.join(' '),
+ :time => check[:executed],
+ :tags => tags
+ }
+ @redis.lpush(options[:channel], Oj.dump(flapjack_event))
+ yield 'sent an event to the flapjack redis queue', 0
+ end
+ end
+ end
+end
diff --git a/sensu/files/plugins/handlers/notification/graphite_event.rb b/sensu/files/plugins/handlers/notification/graphite_event.rb
new file mode 100644
index 0000000..03d0bfc
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/graphite_event.rb
@@ -0,0 +1,79 @@
+#!/usr/bin/env ruby
+#
+# Sensu - Graphite Event Handler
+#
+# This handler takes events and POSTs them to a graphite events URI.
+#
+# For configuration see: graphite_event.json
+#
+# See here for more details:
+#
+# * https://code.launchpad.net/~lucio.torre/graphite/add-events/+merge/69142
+#
+# Author: Rob Wilson <roobert@gmail.com>
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details
+
+require 'rubygems' if RUBY_VERSION < '1.9.0'
+require 'sensu-handler'
+require 'net/http'
+require 'net/https'
+require 'uri'
+require 'json'
+
+class GraphiteEvent < Sensu::Handler
+ def post_event(uri, body)
+ uri = URI.parse(uri)
+ req = Net::HTTP::Post.new(uri.path)
+ sock = Net::HTTP.new(uri.host, uri.port)
+ sock.use_ssl = true
+ req.body = body
+
+ req.basic_auth(uri.user, uri.password) if uri.user
+
+ sock.start { |http| http.request(req) }
+ end
+
+ def event_status
+ case @event['check']['status']
+ when 0
+ 'ok'
+ when 1
+ 'warning'
+ when 2
+ 'critical'
+ else
+ 'unknown'
+ end
+ end
+
+ def handle
+ tags = [
+ 'sensu',
+ 'event',
+ event_status,
+ @event['client']['name'],
+ @event['check']['name']
+ ]
+
+ tags += settings['graphite_event']['tags'] if settings['graphite_event']['tags']
+
+ body = {
+ 'what' => 'sensu_event',
+ 'tags' => tags.join(','),
+ 'data' => event_status,
+ 'when' => Time.now.to_i
+ }
+
+ uri = settings['graphite_event']['server_uri']
+
+ begin
+ post_event(uri, body.to_json)
+ rescue => e
+ bail "failed to send event to #{uri}: #{e}"
+ end
+
+ puts "sent event to graphite: #{body.to_json}"
+ end
+end
diff --git a/sensu/files/plugins/handlers/notification/kedb.py b/sensu/files/plugins/handlers/notification/kedb.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/kedb.py
diff --git a/sensu/files/plugins/handlers/notification/logstash.rb b/sensu/files/plugins/handlers/notification/logstash.rb
new file mode 100644
index 0000000..07c49dd
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/logstash.rb
@@ -0,0 +1,59 @@
+#!/usr/bin/env ruby
+#
+# Sensu Logstash Handler
+#
+# Heavily inspried (er, copied from) the GELF Handler writeen by
+# Joe Miller.
+#
+# Designed to take sensu events, transform them into logstah JSON events
+# and ship them to a redis server for logstash to index. This also
+# generates a tag with either 'sensu-ALERT' or 'sensu-RECOVERY' so that
+# searching inside of logstash can be a little easier.
+#
+# Written by Zach Dunn -- @SillySophist or http://github.com/zadunn
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+
+require 'rubygems' if RUBY_VERSION < '1.9.0'
+require 'sensu-handler'
+require 'redis'
+require 'json'
+require 'socket'
+require 'time'
+
+class LogstashHandler < Sensu::Handler
+
+ def event_name
+ @event['client']['name'] + '/' + @event['check']['name']
+ end
+
+ def action_to_string
+ @event['action'].eql?('resolve') ? "RESOLVE" : "ALERT"
+ end
+
+ def handle
+ redis = Redis.new(:host => settings['logstash']['server'], :port => settings['logstash']['port'])
+ time = Time.now.utc.iso8601
+ logstash_msg = {
+ :@timestamp => time,
+ :@version => 1,
+ :source => ::Socket.gethostname,
+ :tags => ["sensu-#{action_to_string}"],
+ :message => @event['check']['output'],
+ :host => @event['client']['name'],
+ :timestamp => @event['check']['issued'],
+ :address => @event['client']['address'],
+ :check_name => @event['check']['name'],
+ :command => @event['check']['command'],
+ :status => @event['check']['status'],
+ :flapping => @event['check']['flapping'],
+ :occurrences => @event['occurrences'],
+ :flapping => @event['check']['flapping'],
+ :occurrences => @event['occurrences'],
+ :action => @event['action']
+ }
+ logstash_msg[:type] = settings['logstash']['type'] if settings['logstash'].has_key?('type')
+ redis.lpush(settings['logstash']['list'], logstash_msg.to_json)
+ end
+end
diff --git a/sensu/files/plugins/handlers/notification/mail.py b/sensu/files/plugins/handlers/notification/mail.py
new file mode 100644
index 0000000..2c43a3b
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/mail.py
@@ -0,0 +1,69 @@
+#!/usr/bin/env python
+import sys
+import smtplib
+from optparse import OptionParser
+import email
+from email.mime.text import MIMEText
+import json
+from datetime import datetime
+try:
+ from sensu import Handler
+except ImportError:
+ print('You must have the sensu Python module i.e.: pip install sensu')
+ sys.exit(1)
+
+class MailHandler(Handler):
+ def handle(self):
+ subj = self.settings.get('mail', {}).get('subject', 'Sensu Alert')
+ to = self.settings.get('mail', {}).get('to', 'root@localhost')
+ from_addr = self.settings.get('mail', {}).get('from', 'sensu@localhost')
+ host = self.settings.get('mail', {}).get('host', 'localhost')
+ port = self.settings.get('mail', {}).get('port', 25)
+ user = self.settings.get('mail', {}).get('user', None)
+ password = self.settings.get('mail', {}).get('password', None)
+ self.send(subj, to, from_addr, host, port, user, password)
+
+ def send(self, subj=None, to_addr=None, from_addr=None, host='localhost',
+ port=25, user=None, password=None):
+ # attempt to parse sensu message
+ try:
+ data = self.event
+ client_host = data.get('client', {}).get('name')
+ check_name = data.get('check', {}).get('name')
+ check_action = data.get('action')
+ timestamp = data.get('check', {}).get('issued')
+ check_date = datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
+ parts = (
+ 'Date: {0}'.format(check_date),
+ 'Host: {0}'.format(client_host),
+ 'Address: {0}'.format(data.get('client', {}).get('address')),
+ 'Action: {0}'.format(check_action),
+ 'Name: {0}'.format(check_name),
+ 'Command: {0}'.format(data.get('check', {}).get('command')),
+ 'Output: {0}'.format(data.get('check', {}).get('output')),
+ )
+ text = '\n'.join(parts)
+ subj = '{0} [{1}: {2} ({3})]'.format(subj, client_host, check_name, check_action)
+ except Exception, e:
+ text = str(e)
+ msg = MIMEText(text)
+ msg['Subject'] = subj
+ msg['To'] = to_addr
+ msg['From'] = from_addr
+ msg['Date'] = email.utils.formatdate(time.time(), localtime=True)
+ #s = smtplib.SMTP(host, int(port))
+
+ if self.settings.get('mail', {}).get('encryption', None) == 'ssl':
+ s = smtplib.SMTP_SSL(host, int(port))
+ else:
+ s = smtplib.SMTP(host, int(port))
+ s.set_debuglevel(True)
+ if user:
+ s.login(user, password)
+
+ s.sendmail(from_addr, [to_addr], msg.as_string())
+ s.quit()
+
+if __name__=='__main__':
+ m = MailHandler()
+ sys.exit(0)
diff --git a/sensu/files/plugins/handlers/notification/mailer.rb b/sensu/files/plugins/handlers/notification/mailer.rb
new file mode 100644
index 0000000..c591186
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/mailer.rb
@@ -0,0 +1,105 @@
+#!/usr/bin/env ruby
+#
+# Sensu Handler: mailer
+#
+# This handler formats alerts as mails and sends them off to a pre-defined recipient.
+#
+# Copyright 2012 Pal-Kristian Hamre (https://github.com/pkhamre | http://twitter.com/pkhamre)
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+
+require 'rubygems' if RUBY_VERSION < '1.9.0'
+require 'sensu-handler'
+gem 'mail', '~> 2.5.4'
+require 'mail'
+require 'timeout'
+
+# patch to fix Exim delivery_method: https://github.com/mikel/mail/pull/546
+module ::Mail
+ class Exim < Sendmail
+ def self.call(path, arguments, destinations, encoded_message)
+ popen "#{path} #{arguments}" do |io|
+ io.puts encoded_message.to_lf
+ io.flush
+ end
+ end
+ end
+end
+
+class Mailer < Sensu::Handler
+ def short_name
+ @event['client']['name'] + '/' + @event['check']['name']
+ end
+
+ def action_to_string
+ @event['action'].eql?('resolve') ? "RESOLVED" : "ALERT"
+ end
+
+ def handle
+ admin_gui = settings['mailer']['admin_gui'] || 'http://localhost:8080/'
+ mail_to = settings['mailer']['mail_to']
+ mail_from = settings['mailer']['mail_from']
+
+ delivery_method = settings['mailer']['delivery_method'] || 'smtp'
+ smtp_address = settings['mailer']['smtp_address'] || 'localhost'
+ smtp_port = settings['mailer']['smtp_port'] || '25'
+ smtp_domain = settings['mailer']['smtp_domain'] || 'localhost.localdomain'
+
+ smtp_username = settings['mailer']['smtp_username'] || nil
+ smtp_password = settings['mailer']['smtp_password'] || nil
+ smtp_authentication = settings['mailer']['smtp_authentication'] || :plain
+ smtp_enable_starttls_auto = settings['mailer']['smtp_enable_starttls_auto'] == "false" ? false : true
+
+ playbook = "Playbook: #{@event['check']['playbook']}" if @event['check']['playbook']
+ body = <<-BODY.gsub(/^\s+/, '')
+ #{@event['check']['output']}
+ Admin GUI: #{admin_gui}
+ Host: #{@event['client']['name']}
+ Timestamp: #{Time.at(@event['check']['issued'])}
+ Address: #{@event['client']['address']}
+ Check Name: #{@event['check']['name']}
+ Command: #{@event['check']['command']}
+ Status: #{@event['check']['status']}
+ Occurrences: #{@event['occurrences']}
+ #{playbook}
+ BODY
+ subject = "#{action_to_string} - #{short_name}: #{@event['check']['notification']}"
+
+ Mail.defaults do
+ delivery_options = {
+ :address => smtp_address,
+ :port => smtp_port,
+ :domain => smtp_domain,
+ :openssl_verify_mode => 'none',
+ :enable_starttls_auto => smtp_enable_starttls_auto
+ }
+
+ unless smtp_username.nil?
+ auth_options = {
+ :user_name => smtp_username,
+ :password => smtp_password,
+ :authentication => smtp_authentication
+ }
+ delivery_options.merge! auth_options
+ end
+
+ delivery_method delivery_method.intern, delivery_options
+ end
+
+ begin
+ timeout 10 do
+ Mail.deliver do
+ to mail_to
+ from mail_from
+ subject subject
+ body body
+ end
+
+ puts 'mail -- sent alert for ' + short_name + ' to ' + mail_to.to_s
+ end
+ rescue Timeout::Error
+ puts 'mail -- timed out while attempting to ' + @event['action'] + ' an incident -- ' + short_name
+ end
+ end
+end
diff --git a/sensu/files/plugins/handlers/notification/sccd.py b/sensu/files/plugins/handlers/notification/sccd.py
new file mode 100644
index 0000000..a41aebb
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/sccd.py
@@ -0,0 +1,111 @@
+#!/usr/bin/env python
+
+import sys
+import smtplib
+import requests
+import json
+from optparse import OptionParser
+from email.mime.text import MIMEText
+from datetime import datetime
+
+try:
+ from sensu import Handler
+except ImportError:
+ print('You must have the sensu Python module i.e.: pip install sensu')
+ sys.exit(1)
+
+class SccdHandler(Handler):
+
+ def handle(self):
+ mail_subj = self.settings.get('sccd', {}).get('mail_subject', 'Sensu Alert')
+ mail_to = self.settings.get('sccd', {}).get('sccd_email', 'root@localhost')
+ mail_from_addr = self.settings.get('sccd', {}).get('mail_user', 'sensu@localhost')
+ mail_host = self.settings.get('sccd', {}).get('mail_host', 'localhost')
+ mail_port = self.settings.get('sccd', {}).get('mail_port', 25)
+ mail_user = self.settings.get('sccd', {}).get('mail_user', None)
+ mail_password = self.settings.get('sccd', {}).get('mail_password', None)
+ print 'SENSU_EVENT=========================='
+ print self.event
+ self.check_kedb()
+ if self.event.get('occurences') < 2 and self.event.get('action') == 'create':
+ self.send_mail(mail_subj, mail_to, mail_from_addr, mail_host, mail_port, mail_user, mail_password)
+ else:
+ if self.event.get('action') != 'create':
+ self.send_mail(mail_subj, mail_to, mail_from_addr, mail_host, mail_port, mail_user, mail_password)
+
+ def check_kedb(self):
+ host = self.settings.get('sccd', {}).get('kedb_host', 'localhost')
+ port = self.settings.get('sccd', {}).get('kedb_port', 25)
+ url = 'http://%s:%s/handle/' % (host, port)
+ print 'URL============================='
+ print url
+ payload = {
+ 'event': self.event,
+ }
+ print 'PAYLOAD============================='
+ print payload
+ response = requests.post(url, data=json.dumps(payload))
+ print 'RESPONSE============================='
+ print response
+ print 'RESPONSE-DATA=========================='
+ self.event = response.json()
+ print self.event
+# return data
+
+ def send_mail(self, subj=None, to_addr=None, from_addr=None, host='localhost',
+ port=25, user=None, password=None):
+ # attempt to parse sensu message
+ try:
+ data = self.event
+ client_host = data.get('client', {}).get('name')
+ check_name = data.get('check', {}).get('name')
+ check_action = data.get('action')
+ timestamp = data.get('check', {}).get('issued')
+ check_date = datetime.fromtimestamp(int(timestamp)).strftime('%Y-%m-%d %H:%M:%S')
+ if data.get('known_error'):
+ template_id = 'CL-%s-%s' % (data.get('level'), data.get('severity'))
+ applies_to = 'incident'
+ else:
+ template_id = 'CL-L2-INT'
+ applies_to = 'incident'
+ parts = (
+ '<MAXIMOEMAILCONTENT>',
+ ' <LSNRACTION>%s</LSNRACTION>' % check_action.upper(),
+ ' <LSNRAPPLIESTO>%s</LSNRAPPLIESTO>' % applies_to.upper(),
+ ' <TICKETID><![CDATA[&AUTOKEY&]]></TICKETID>',
+ ' <CLASS>%s</CLASS>' % applies_to.upper(),
+ ' <DESCRIPTION>%s @ %s</DESCRIPTION>' % (check_name, client_host),
+ ' <DESCRIPTION_LONGDESCRIPTION>%s: %s</DESCRIPTION_LONGDESCRIPTION>' % (check_date, data.get('check', {}).get('output')),
+ ' <TEMPLATEID>%s</TEMPLATEID>' % template_id.upper(),
+ ' <SITEID>%s</SITEID>' % self.settings.get('sccd', {}).get('sccd_site', 'default_site'),
+ '</MAXIMOEMAILCONTENT>',
+ )
+ text = '\n'.join(parts)
+ subj = '{0} [{1}: {2} ({3})]'.format(subj, client_host, check_name, check_action)
+ except Exception, e:
+ text = str(e)
+
+ print 'TEXT============================='
+ print text
+
+ msg = MIMEText(text)
+ msg['Subject'] = subj
+ msg['To'] = to_addr
+ msg['From'] = from_addr
+
+ if self.settings.get('sccd', {}).get('mail_encryption', None) == 'ssl':
+ s = smtplib.SMTP_SSL(host, int(port))
+ else:
+ s = smtplib.SMTP(host, int(port))
+ s.set_debuglevel(True)
+
+ if user:
+ s.login(user, password)
+
+ s.sendmail(from_addr, [to_addr], msg.as_string())
+ print s
+ s.quit()
+
+if __name__=='__main__':
+ m = SccdHandler()
+ sys.exit(0)
diff --git a/sensu/files/plugins/handlers/notification/slack.rb b/sensu/files/plugins/handlers/notification/slack.rb
new file mode 100644
index 0000000..b80c7d0
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/slack.rb
@@ -0,0 +1,132 @@
+#!/usr/bin/env ruby
+
+# Copyright 2014 Dan Shultz and contributors.
+#
+# Released under the same terms as Sensu (the MIT license); see LICENSE
+# for details.
+#
+# In order to use this plugin, you must first configure an incoming webhook
+# integration in slack. You can create the required webhook by visiting
+# https://{your team}.slack.com/services/new/incoming-webhook
+#
+# After you configure your webhook, you'll need the webhook URL from the integration.
+
+require 'sensu-handler'
+require 'json'
+
+class Slack < Sensu::Handler
+ option :json_config,
+ description: 'Configuration name',
+ short: '-j JSONCONFIG',
+ long: '--json JSONCONFIG',
+ default: 'slack'
+
+ def slack_webhook_url
+ get_setting('webhook_url')
+ end
+
+ def slack_channel
+ get_setting('channel')
+ end
+
+ def slack_proxy_addr
+ get_setting('proxy_addr')
+ end
+
+ def slack_proxy_port
+ get_setting('proxy_port')
+ end
+
+ def slack_message_prefix
+ get_setting('message_prefix')
+ end
+
+ def slack_bot_name
+ get_setting('bot_name')
+ end
+
+ def slack_surround
+ get_setting('surround')
+ end
+
+ def markdown_enabled
+ get_setting('markdown_enabled') || true
+ end
+
+ def incident_key
+ @event['client']['name'] + '/' + @event['check']['name']
+ end
+
+ def get_setting(name)
+ settings[config[:json_config]][name]
+ end
+
+ def handle
+ description = @event['check']['notification'] || build_description
+ post_data("*Check*\n#{incident_key}\n\n*Description*\n#{description}")
+ end
+
+ def build_description
+ [
+ @event['check']['output'].strip,
+ @event['client']['address'],
+ @event['client']['subscriptions'].join(',')
+ ].join(' : ')
+ end
+
+ def post_data(notice)
+ uri = URI(slack_webhook_url)
+
+ if (defined?(slack_proxy_addr)).nil?
+ http = Net::HTTP.new(uri.host, uri.port)
+ else
+ http = Net::HTTP::Proxy(slack_proxy_addr, slack_proxy_port).new(uri.host, uri.port)
+ end
+
+ http.use_ssl = true
+
+ req = Net::HTTP::Post.new("#{uri.path}?#{uri.query}")
+ text = slack_surround ? slack_surround + notice + slack_surround : notice
+ req.body = payload(text).to_json
+
+ response = http.request(req)
+ verify_response(response)
+ end
+
+ def verify_response(response)
+ case response
+ when Net::HTTPSuccess
+ true
+ else
+ fail response.error!
+ end
+ end
+
+ def payload(notice)
+ {
+ icon_url: 'http://sensuapp.org/img/sensu_logo_large-c92d73db.png',
+ attachments: [{
+ text: [slack_message_prefix, notice].compact.join(' '),
+ color: color
+ }]
+ }.tap do |payload|
+ payload[:channel] = slack_channel if slack_channel
+ payload[:username] = slack_bot_name if slack_bot_name
+ payload[:attachments][0][:mrkdwn_in] = %w(text) if markdown_enabled
+ end
+ end
+
+ def color
+ color = {
+ 0 => '#36a64f',
+ 1 => '#FFCC00',
+ 2 => '#FF0000',
+ 3 => '#6600CC'
+ }
+ color.fetch(check_status.to_i)
+ end
+
+ def check_status
+ @event['check']['status']
+ end
+end
diff --git a/sensu/files/plugins/handlers/notification/statsd.py b/sensu/files/plugins/handlers/notification/statsd.py
new file mode 100644
index 0000000..1eec960
--- /dev/null
+++ b/sensu/files/plugins/handlers/notification/statsd.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python
+import sys
+import smtplib
+from optparse import OptionParser
+from email.mime.text import MIMEText
+import json
+from datetime import datetime
+try:
+ from sensu import Handler
+except ImportError:
+ print('You must have the sensu Python module i.e.: pip install sensu')
+ sys.exit(1)
+
+try:
+ import statsd
+except ImportError:
+ print('You must have the Statsd Python module i.e.: \
+ pip install python-statsd==1.6.0')
+ sys.exit(1)
+
+
+class StatsdHandler(Handler):
+
+ def handle(self):
+ statsd_connection = statsd.Connection(
+ host=self.settings.get('statsd').get('host', '127.0.0.1'),
+ port=self.settings.get('statsd').get('port', 8125),
+ sample_rate=self.settings.get('statsd').get('sample_rate', 1),
+ )
+ meter = statsd.Gauge(
+ self.settings.get('statsd').get('prefix', 'sensu'),
+ statsd_connection)
+
+ key = '{}.{}'.format(self.event['client']['name'].replace(
+ '.', '_'), self.event['check']['name'].replace('.', '_'))
+
+ meter.send(key, self.event['check']['status'])
+
+if __name__ == '__main__':
+ m = StatsdHandler()
+ sys.exit(0)
diff --git a/sensu/files/plugins/mutators/kedb.py b/sensu/files/plugins/mutators/kedb.py
new file mode 100644
index 0000000..0039013
--- /dev/null
+++ b/sensu/files/plugins/mutators/kedb.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import sys
+import requests
+import json
+
+event = json.load(sys.stdin)
+
+#if event.get('occurences') == 1:
+
+host = event.get('kedb_host', 'localhost')
+port = event.get('kedb_port', 6754)
+url = 'http://%s:%s/handle/' % (host, port)
+#print 'URL============================='
+#print url
+payload = {
+ 'event': event,
+}
+
+file = open("/tmp/payload", "w")
+file.write(json.dumps(payload))
+file.close
+
+#print 'PAYLOAD============================='
+#print payload
+response = requests.post(url, data=json.dumps(payload))
+#print 'RESPONSE============================='
+#print response
+#print 'RESPONSE-DATA=========================='
+
+file = open("/tmp/payload_response", "w")
+file.write(json.dumps(response.json()))
+file.close
+
+if response.status_code == 200:
+ event = response.json()
+
+print json.dumps(event)
+
+sys.exit(0)
diff --git a/sensu/files/profile b/sensu/files/profile
new file mode 100644
index 0000000..3c8fa4c
--- /dev/null
+++ b/sensu/files/profile
@@ -0,0 +1,6 @@
+export PATH=$PATH:/usr/sensu
+{% if grains.osarch == 'amd64' %}
+export PATH=$PATH:/usr/lib64/nagios/plugins
+{% else %}
+export PATH=$PATH:/usr/lib/nagios/plugins
+{% endif %}
\ No newline at end of file
diff --git a/sensu/files/rabbitmq.json b/sensu/files/rabbitmq.json
new file mode 100644
index 0000000..c35ff1d
--- /dev/null
+++ b/sensu/files/rabbitmq.json
@@ -0,0 +1,21 @@
+{%- from "sensu/map.jinja" import server with context %}
+{%- from "sensu/map.jinja" import client with context %}
+{
+ {%- if pillar.sensu.server is defined %}
+ "rabbitmq": {
+ "host": "{{ server.message_queue.host }}",
+ "port": {{ server.message_queue.port }},
+ "user": "{{ server.message_queue.user }}",
+ "password": "{{ server.message_queue.password }}",
+ "vhost": "{{ server.message_queue.virtual_host }}"
+ }
+ {%- else %}
+ "rabbitmq": {
+ "host": "{{ client.message_queue.host }}",
+ "port": {{ client.message_queue.port }},
+ "user": "{{ client.message_queue.user }}",
+ "password": "{{ client.message_queue.password }}",
+ "vhost": "{{ client.message_queue.virtual_host }}"
+ }
+ {%- endif %}
+}
\ No newline at end of file
diff --git a/sensu/files/redis.json b/sensu/files/redis.json
new file mode 100644
index 0000000..5fbc088
--- /dev/null
+++ b/sensu/files/redis.json
@@ -0,0 +1,7 @@
+{%- from "sensu/map.jinja" import server with context -%}
+{
+ "redis": {
+ "host": "{{ server.database.host }}",
+ "port": {{ server.database.port }}
+ }
+}
\ No newline at end of file
diff --git a/sensu/files/sensu.conf b/sensu/files/sensu.conf
new file mode 100644
index 0000000..d9ce274
--- /dev/null
+++ b/sensu/files/sensu.conf
@@ -0,0 +1,25 @@
+{%- from "nginx/map.jinja" import server with context -%}
+local_sensu_server_proc:
+ command: "PATH=$PATH:/usr/lib64/nagios/plugins:/usr/lib/nagios/plugins check_procs -C sensu-server -u sensu -c 1:1"
+ interval: 60
+ occurrences: 1
+ subscribers:
+ - local-sensu-server
+local_sensu_api_proc:
+ command: "PATH=$PATH:/usr/lib64/nagios/plugins:/usr/lib/nagios/plugins check_procs -C sensu-api -u sensu -c 1:1"
+ interval: 60
+ occurrences: 1
+ subscribers:
+ - local-sensu-server
+local_sensu_client_proc:
+ command: "PATH=$PATH:/usr/lib64/nagios/plugins:/usr/lib/nagios/plugins check_procs -C sensu-client -u sensu -c 1:1"
+ interval: 60
+ occurrences: 1
+ subscribers:
+ - local-sensu-client
+local_sensu_dashboard_proc:
+ command: "PATH=$PATH:/usr/lib64/nagios/plugins:/usr/lib/nagios/plugins check_procs -C uchiwa -u uchiwa -c 1:1"
+ interval: 60
+ occurrences: 1
+ subscribers:
+ - local-sensu-dashboard
\ No newline at end of file
diff --git a/sensu/files/sudoer b/sensu/files/sudoer
new file mode 100644
index 0000000..68be5a0
--- /dev/null
+++ b/sensu/files/sudoer
@@ -0,0 +1,5 @@
+
+sensu ALL=(ALL) NOPASSWD: /usr/bin/contrail-status
+sensu ALL=(ALL) NOPASSWD: /etc/sensu/plugins/check_supervisor_proc.py
+sensu ALL=(ALL) NOPASSWD: /usr/bin/supervisorctl
+#
\ No newline at end of file
diff --git a/sensu/files/uchiwa.json b/sensu/files/uchiwa.json
new file mode 100644
index 0000000..54a2011
--- /dev/null
+++ b/sensu/files/uchiwa.json
@@ -0,0 +1,20 @@
+{%- from "sensu/map.jinja" import dashboard with context -%}
+{
+ "sensu": [
+ {
+ "name": "Sensu",
+ "host": "localhost",
+ "ssl": false,
+ "port": 4567,
+ "path": "",
+ "timeout": 5000
+ }
+ ],
+ "uchiwa": {
+ "host": "{{ dashboard.bind.address }}",
+ "port": {{ dashboard.bind.port }},
+ "user": "{{ dashboard.admin.username }}",
+ "pass": "{{ dashboard.admin.password }}",
+ "refresh": 5
+ }
+}
\ No newline at end of file
diff --git a/sensu/init.sls b/sensu/init.sls
new file mode 100644
index 0000000..d1122d4
--- /dev/null
+++ b/sensu/init.sls
@@ -0,0 +1,13 @@
+
+{%- if pillar.sensu is defined %}
+include:
+{%- if pillar.sensu.server is defined %}
+- sensu.server
+{%- endif %}
+{%- if pillar.sensu.client is defined %}
+- sensu.client
+{%- endif %}
+{%- if pillar.sensu.dashboard is defined %}
+- sensu.dashboard
+{%- endif %}
+{%- endif %}
diff --git a/sensu/map.jinja b/sensu/map.jinja
new file mode 100644
index 0000000..4f3a550
--- /dev/null
+++ b/sensu/map.jinja
@@ -0,0 +1,60 @@
+{% set services = [
+ 'billometer',
+ 'cinder',
+ 'ceilometer',
+ 'gitlab',
+ 'glance',
+ 'haproxy',
+ 'heat',
+ 'horizon',
+ 'kedb',
+ 'keystone',
+ 'linux',
+ 'memcached',
+ 'mysql',
+ 'nginx',
+ 'nova',
+ 'ntp',
+ 'opencontrail',
+ 'openssh',
+ 'postgresql',
+ 'rabbitmq',
+ 'salt',
+ 'sensu',
+ 'supervisor',
+] %}
+
+{% set server = salt['grains.filter_by']({
+ 'Debian': {
+ 'pkgs': ['sensu', 'python-pip'],
+ 'supported_services': services,
+ 'mine_checks': True,
+ },
+ 'RedHat': {
+ 'pkgs': ['sensu'],
+ 'supported_services': services,
+ 'mine_checks': True,
+ },
+}, merge=salt['pillar.get']('sensu:server')) %}
+
+{% set client = salt['grains.filter_by']({
+ 'Debian': {
+ 'pkgs': ['sensu', 'nagios-plugins-basic', 'curl'],
+ 'supported_services': services,
+ 'plugin': {}
+ },
+ 'RedHat': {
+ 'pkgs': ['sensu', 'nagios-plugins-ping', 'nagios-plugins-procs', 'nagios-plugins-load', 'nagios-plugins-swap', 'nagios-plugins-ntp', 'nagios-plugins-http', 'nagios-plugins-disk', 'net-snmp-utils', 'pynag'],
+ 'supported_services': services,
+ 'plugin': {}
+ },
+}, merge=salt['pillar.get']('sensu:client')) %}
+
+{% set dashboard = salt['grains.filter_by']({
+ 'Debian': {
+ 'pkgs': ['uchiwa',],
+ },
+ 'RedHat': {
+ 'pkgs': ['uchiwa'],
+ },
+}, merge=salt['pillar.get']('sensu:dashboard')) %}
diff --git a/sensu/server.sls b/sensu/server.sls
new file mode 100644
index 0000000..c4d5663
--- /dev/null
+++ b/sensu/server.sls
@@ -0,0 +1,152 @@
+{%- from "sensu/map.jinja" import server with context %}
+{%- if server.enabled %}
+
+include:
+- sensu._common
+
+sensu_server_packages:
+ pkg.installed:
+ - names: {{ server.pkgs }}
+ - require_in:
+ - file: /etc/sensu
+
+sensu_server_pip:
+ pip.installed:
+ - name: sensu
+ - require:
+ - pkg: sensu_server_packages
+
+{%- if server.mine_checks %}
+
+{%- set client_checks = {} %}
+
+{%- for node_name, node_grains in salt['mine.get']('*', 'grains.items').iteritems() %}
+
+{%- set rowloop = loop %}
+
+{%- if node_grains.get('sensu_checks', {}) is not none %}
+
+{%- for check_name, check in node_grains.get('sensu_checks', {}).iteritems() %}
+
+/etc/sensu/conf.d/check_{{ check_name }}.json_{{ rowloop.index }}-{{ loop.index }}:
+ file.managed:
+ - name: /etc/sensu/conf.d/check_{{ check_name }}.json
+ - source: salt://sensu/files/check.json
+ - template: jinja
+ - defaults:
+ check_name: "{{ check_name }}"
+ check: {{ check|yaml }}
+ - require:
+ - pkg: sensu_server_packages
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+{%- endfor %}
+
+{%- endif %}
+
+{%- endfor %}
+
+{%- endif %}
+
+{%- for check in server.get('checks', []) %}
+
+/etc/sensu/conf.d/check_{{ check.name }}.json:
+ file.managed:
+ - source: salt://sensu/files/check_manual.json
+ - template: jinja
+ - defaults:
+ check_name: "{{ check.name }}"
+ - require:
+ - pkg: sensu_server_packages
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+{%- endfor %}
+
+{%- for mutator in server.get('mutators', []) %}
+
+/etc/sensu/conf.d/mutator_{{ mutator.name }}.json:
+ file.managed:
+ - source: salt://sensu/files/mutator.json
+ - template: jinja
+ - defaults:
+ mutator_name: "{{ mutator.name }}"
+ - require:
+ - file: /etc/sensu/config.json
+ - pkg: sensu_packages
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+{%- endfor %}
+
+{%- for handler_name, handler in server.get('handler', {}).iteritems() %}
+
+{%- if handler_name in ['default', 'flapjack', 'mail', 'sccd', 'stdout', 'statsd', 'slack'] %}
+
+{%- include "sensu/server/_handler_"+handler_name+".sls" %}
+
+{%- endif %}
+
+{%- endfor %}
+
+/etc/sensu/conf.d/api.json:
+ file.managed:
+ - source: salt://sensu/files/api.json
+ - template: jinja
+ - mode: 644
+ - require:
+ - file: /etc/sensu
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/conf.d/redis.json:
+ file.managed:
+ - source: salt://sensu/files/redis.json
+ - template: jinja
+ - mode: 644
+ - require:
+ - file: /etc/sensu
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+service_sensu_server:
+ service.running:
+ - name: sensu-server
+ - enable: true
+
+service_sensu_api:
+ service.running:
+ - name: sensu-api
+ - enable: true
+
+/srv/sensu/handlers:
+ file.recurse:
+ - clean: true
+ - source: salt://sensu/files/plugins/handlers
+ - user: sensu
+ - group: sensu
+ - file_mode: 755
+ - dir_mode: 755
+ - makedirs: true
+ - require:
+ - file: /srv/sensu
+
+/srv/sensu/mutators:
+ file.recurse:
+ - clean: true
+ - source: salt://sensu/files/plugins/mutators
+ - user: sensu
+ - group: sensu
+ - file_mode: 755
+ - dir_mode: 755
+ - makedirs: true
+ - require:
+ - file: /srv/sensu
+
+{%- endif %}
\ No newline at end of file
diff --git a/sensu/server/_handler_default.sls b/sensu/server/_handler_default.sls
new file mode 100644
index 0000000..b298507
--- /dev/null
+++ b/sensu/server/_handler_default.sls
@@ -0,0 +1,10 @@
+
+/etc/sensu/conf.d/handler_{{ handler_name }}.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/{{ handler_name }}.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
diff --git a/sensu/server/_handler_flapjack.sls b/sensu/server/_handler_flapjack.sls
new file mode 100644
index 0000000..42c6288
--- /dev/null
+++ b/sensu/server/_handler_flapjack.sls
@@ -0,0 +1,19 @@
+
+/etc/sensu/conf.d/flapjack.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/{{ handler_name }}.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/extensions/handlers/flapjack.rb:
+ file.managed:
+ - source: salt://sensu/files/plugins/handlers/flapjack.rb
+ - mode: 660
+ - user: sensu
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
diff --git a/sensu/server/_handler_mail.sls b/sensu/server/_handler_mail.sls
new file mode 100644
index 0000000..cbe98f8
--- /dev/null
+++ b/sensu/server/_handler_mail.sls
@@ -0,0 +1,31 @@
+
+/etc/sensu/conf.d/mail.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/mail.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "config"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/conf.d/handler_mail.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/mail.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "handler"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/handlers/mail.py:
+ file.managed:
+ - source: salt://sensu/files/plugins/handlers/notification/mail.py
+ - mode: 700
+ - user: sensu
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
\ No newline at end of file
diff --git a/sensu/server/_handler_sccd.sls b/sensu/server/_handler_sccd.sls
new file mode 100644
index 0000000..7a6441c
--- /dev/null
+++ b/sensu/server/_handler_sccd.sls
@@ -0,0 +1,31 @@
+
+/etc/sensu/conf.d/sccd.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/sccd.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "config"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/conf.d/handler_sccd.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/sccd.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "handler"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/handlers/sccd.py:
+ file.managed:
+ - source: salt://sensu/files/plugins/handlers/notification/sccd.py
+ - mode: 700
+ - user: sensu
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
\ No newline at end of file
diff --git a/sensu/server/_handler_slack.sls b/sensu/server/_handler_slack.sls
new file mode 100644
index 0000000..994e2d6
--- /dev/null
+++ b/sensu/server/_handler_slack.sls
@@ -0,0 +1,31 @@
+
+/etc/sensu/conf.d/slack.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/slack.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "config"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/conf.d/handler_slack.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/slack.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "handler"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/handlers/slack.rb:
+ file.managed:
+ - source: salt://sensu/files/plugins/handlers/notification/slack.rb
+ - mode: 700
+ - user: sensu
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
\ No newline at end of file
diff --git a/sensu/server/_handler_statsd.sls b/sensu/server/_handler_statsd.sls
new file mode 100644
index 0000000..8412d5a
--- /dev/null
+++ b/sensu/server/_handler_statsd.sls
@@ -0,0 +1,37 @@
+
+python-statsd:
+ pip.installed:
+ - name: python-statsd == 1.6.0
+
+/etc/sensu/conf.d/statsd.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/statsd.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "config"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+ - require:
+ - pip: python-statsd
+
+/etc/sensu/conf.d/handler_statsd.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/statsd.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "handler"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
+
+/etc/sensu/handlers/statsd_handler.py:
+ file.managed:
+ - source: salt://sensu/files/plugins/handlers/notification/statsd.py
+ - mode: 700
+ - user: sensu
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api
\ No newline at end of file
diff --git a/sensu/server/_handler_stdout.sls b/sensu/server/_handler_stdout.sls
new file mode 100644
index 0000000..1371cb3
--- /dev/null
+++ b/sensu/server/_handler_stdout.sls
@@ -0,0 +1,11 @@
+
+/etc/sensu/conf.d/handler_stdout.json:
+ file.managed:
+ - source: salt://sensu/files/handlers/stdout.json
+ - template: jinja
+ - defaults:
+ handler_name: "{{ handler_name }}"
+ handler_setting: "handler"
+ - watch_in:
+ - service: service_sensu_server
+ - service: service_sensu_api