Merge "* Splitting pam modules: - ldap - mkhomedir"
diff --git a/.kitchen.yml b/.kitchen.yml
index a998cfd..c704da5 100644
--- a/.kitchen.yml
+++ b/.kitchen.yml
@@ -31,7 +31,7 @@
platforms:
- name: <%=ENV['PLATFORM'] || 'saltstack-ubuntu-xenial-salt-stable' %>
driver_config:
- image: <%=ENV['PLATFORM'] || 'epcim/salt-formulas:saltstack-ubuntu-xenial-salt-stable'%>
+ image: <%=ENV['PLATFORM'] || 'epcim/salt:saltstack-ubuntu-xenial-salt-stable'%>
platform: ubuntu
diff --git a/.travis.yml b/.travis.yml
index 4ba771d..78246a5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,14 +22,16 @@
- bundle install
env:
- - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3 SUITE=network
- - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2017.7 SUITE=network
- # - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3 SUITE=storage
- # - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2017.7 SUITE=storage
- - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3 SUITE=system
- - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2017.7 SUITE=system
- # - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-bionic-salt-2017.7 SUITE=network
- # - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-bionic-salt-2017.7 SUITE=system
+ - PLATFORM=epcim/salt:saltstack-ubuntu-xenial-salt-2016.3 SUITE=network
+ - PLATFORM=epcim/salt:saltstack-ubuntu-xenial-salt-2016.3 SUITE=system
+ - PLATFORM=epcim/salt:saltstack-ubuntu-xenial-salt-2017.7 SUITE=network
+ - PLATFORM=epcim/salt:saltstack-ubuntu-xenial-salt-2017.7 SUITE=system
+ - PLATFORM=epcim/salt:saltstack-ubuntu-xenial-salt-2018.3 SUITE=network
+ - PLATFORM=epcim/salt:saltstack-ubuntu-xenial-salt-2018.3 SUITE=system
+ # - PLATFORM=epcim/salt:saltstack-ubuntu-bionic-salt-2017.7 SUITE=network
+ # - PLATFORM=epcim/salt:saltstack-ubuntu-bionic-salt-2017.7 SUITE=system
+ # - PLATFORM=epcim/salt:saltstack-ubuntu-bionic-salt-2018.3 SUITE=network
+ # - PLATFORM=epcim/salt:saltstack-ubuntu-bionic-salt-2018.3 SUITE=system
before_script:
- set -o pipefail
diff --git a/README.rst b/README.rst
index ace428b..2765990 100644
--- a/README.rst
+++ b/README.rst
@@ -603,7 +603,7 @@
size: 2M
count: 107520
mount_point: /mnt/hugepages_2MB
- mount: false/true # default false
+ mount: false/true # default is true (mount immediately) / false (just save in the fstab)
large:
default: true # default automatically mounted
size: 1G
@@ -918,6 +918,18 @@
logpath: "/var/log/atop"
outfile: "/var/log/atop/daily.log"
+Linux with mcelog service:
+
+.. code-block:: yaml
+
+ linux:
+ system:
+ mcelog:
+ enabled: true
+ logging:
+ syslog: true
+ syslog_error: true
+
RHEL / CentOS
^^^^^^^^^^^^^
diff --git a/linux/files/mcelog.conf b/linux/files/mcelog.conf
new file mode 100644
index 0000000..2b2302f
--- /dev/null
+++ b/linux/files/mcelog.conf
@@ -0,0 +1,199 @@
+{%- from "linux/map.jinja" import system with context %}
+{%- set mcelog = system.mcelog %}
+#
+# Example config file for mcelog
+# mcelog is the user space backend that decodes and process machine check events
+# (cpu hardware errors) reported by the CPU to the kernel
+#
+
+# general format
+#optionname = value
+# white space is not allowed in value currently, except at the end where it is dropped
+#
+
+# In general all command line options that are not commands work here.
+# See man mcelog or mcelog --help for a list.
+# e.g. to enable the --no-syslog option use
+#no-syslog = yes (or no to disable)
+# when the option has a argument
+#logfile = /tmp/logfile
+# below are the options which are not command line options.
+
+# Set CPU type for which mcelog decodes events:
+#cpu = type
+# For valid values for type please see mcelog --help.
+# If this value is set incorrectly the decoded output will be likely incorrect.
+# By default when this parameter is not set mcelog uses the CPU it is running on
+# on very new kernels the mcelog events reported by the kernel also carry
+# the CPU type which is used too when available and not overriden.
+
+# Enable daemon mode:
+#daemon = yes
+# By default mcelog just processes the currently pending events and exits.
+# In daemon mode it will keep running as a daemon in the background and poll
+# the kernel for events and then decode them.
+
+# Filter out known broken events by default.
+filter = yes
+# Don't log memory errors individually.
+# They still get accounted if that is enabled.
+#filter-memory-errors = yes
+
+# output in undecoded raw format to be easier machine readable
+# (default is decoded).
+#raw = yes
+
+# Set CPU Mhz to decode uptime from time stamp counter (output
+# unreliable, not needed on new kernels which report the event time
+# directly. A lot of systems don't have a linear time stamp clock
+# and the output is wrong then.
+# Normally mcelog tries to figure out if it the TSC is reliable
+# and only uses the current frequency then.
+# Setting a frequency forces timestamp decoding.
+# This setting is obsolete with modern kernels which report the time
+# directly.
+#cpumhz = 1800.00
+
+# log output options
+# Log decoded machine checks in syslog (default stdout or syslog for daemon)
+#syslog = yes
+# Log decoded machine checks in syslog with error level
+#syslog-error = yes
+# Never log anything to syslog
+#no-syslog = yes
+# Append log output to logfile instead of stdout. Only when no syslog logging is active
+#logfile = filename
+
+{%- if mcelog.logging is defined %}
+
+{%- if mcelog.logging.syslog is defined %}
+syslog = {{ 'yes' if mcelog.logging.syslog else 'no' }}
+{%- endif %}
+{%- if mcelog.logging.syslog_error is defined %}
+syslog-error = {{ 'yes' if mcelog.logging.syslog_error else 'no' }}
+{%- endif %}
+{%- if mcelog.logging.no_syslog is defined %}
+no-syslog = {{ 'yes' if mcelog.logging.no_syslog else 'no' }}
+{%- endif %}
+{%- if mcelog.logging.logfile is defined %}
+logfile = {{ mcelog.logging.logfile }}
+{%- endif %}
+
+{%- endif %}
+# Use SMBIOS information to decode DIMMs (needs root).
+# This function is not recommended to use right now and generally not needed.
+# The exception is memdb prepopulation, which is configured separately below.
+#dmi = no
+
+# When in daemon mode run as this user after set up.
+# Note that the triggers will run as this user too.
+# Setting this to non root will mean that triggers cannot take some corrective
+# action, like offlining objects.
+#run-credentials-user = root
+
+# group to run as daemon with
+# default to the group of the run-credentials-user
+#run-credentials-group = nobody
+
+[server]
+# user allowed to access client socket.
+# when set to * match any
+# root is always allowed to access.
+# default: root only
+client-user = root
+# group allowed to access mcelog
+# When no group is configured any group matches (but still user checking).
+# when set to * match any
+#client-group = root
+# Path to the unix socket for client<->server communication.
+# When no socket-path is configured the server will not start
+#socket-path = /var/run/mcelog-client
+# When mcelog starts it checks if a server is already running. This configures the timeout
+# for this check.
+#initial-ping-timeout = 2
+#
+[dimm]
+# Is the in memory DIMM error tracking enabled?
+# Only works on systems with integrated memory controller and
+# which are supported.
+# Only takes effect in daemon mode.
+dimm-tracking-enabled = yes
+# Use DMI information from the BIOS to prepopulate DIMM database.
+# Note this might not work with all BIOS and requires mcelog to run as root.
+# Alternative is to let mcelog create DIMM objects on demand.
+dmi-prepopulate = yes
+#
+# Execute these triggers when the rate of corrected or uncorrected
+# Errors per DIMM exceeds the threshold.
+# Note when the hardware does not report DIMMs this might also
+# be per channel.
+# The default of 10/24h is reasonable for server quality
+# DDR3 DIMMs as of 2009/10.
+#uc-error-trigger = dimm-error-trigger
+uc-error-threshold = 1 / 24h
+#ce-error-trigger = dimm-error-trigger
+ce-error-threshold = 10 / 24h
+
+[socket]
+# Enable memory error accounting per socket.
+socket-tracking-enabled = yes
+
+# Threshold and trigger for uncorrected memory errors on a socket.
+# mem-uc-error-trigger = socket-memory-error-trigger
+
+mem-uc-error-threshold = 100 / 24h
+
+# Trigger script for corrected memory errors on a socket.
+mem-ce-error-trigger = socket-memory-error-trigger
+
+# Threshold on when to trigger a correct error for the socket.
+
+mem-ce-error-threshold = 100 / 24h
+
+# Log socket error threshold explicitely?
+mem-ce-error-log = yes
+
+# Trigger script for uncorrected bus error events
+bus-uc-threshold-trigger = bus-error-trigger
+
+# Trigger script for uncorrected IOMCA erors
+iomca-threshold-trigger = iomca-error-trigger
+
+# Trigger script for other uncategorized errors
+unknown-threshold-trigger = unknown-error-trigger
+
+[cache]
+# Processing of cache error thresholds reported by Intel CPUs.
+cache-threshold-trigger = cache-error-trigger
+
+# Should cache threshold events be logged explicitely?
+cache-threshold-log = yes
+
+[page]
+# Memory error accouting per 4K memory page.
+# Threshold for the correct memory errors trigger script.
+memory-ce-threshold = 10 / 24h
+
+# Trigger script for corrected errors.
+# memory-ce-trigger = page-error-trigger
+
+# Should page threshold events be logged explicitely?
+memory-ce-log = yes
+
+# specify the internal action in mcelog to exceeding a page error threshold
+# this is done in addition to executing the trigger script if available
+# off no action
+# account only account errors
+# soft try to soft-offline page without killing any processes
+# This requires an uptodate kernel. Might not be successfull.
+# hard try to hard-offline page by killing processes
+# Requires an uptodate kernel. Might not be successfull.
+# soft-then-hard First try to soft offline, then try hard offlining
+#memory-ce-action = off|account|soft|hard|soft-then-hard
+memory-ce-action = soft
+
+[trigger]
+# Maximum number of running triggers
+children-max = 2
+# execute triggers in this directory
+directory = /etc/mcelog
diff --git a/linux/files/openvswitch-switch.default b/linux/files/openvswitch-switch.default
new file mode 100644
index 0000000..06c769d
--- /dev/null
+++ b/linux/files/openvswitch-switch.default
@@ -0,0 +1,44 @@
+{%- from "linux/map.jinja" import network with context %}
+{%- set openvswitch = network.openvswitch %}
+# This is a POSIX shell fragment -*- sh -*-
+
+# FORCE_COREFILES: If 'yes' then core files will be enabled.
+# FORCE_COREFILES=yes
+
+# OVS_CTL_OPTS: Extra options to pass to ovs-ctl. This is, for example,
+# a suitable place to specify --ovs-vswitchd-wrapper=valgrind.
+# OVS_CTL_OPTS=
+
+# OVS_VSWITCHD_OPTS: Extra options to pass to ovs-ctl.
+# Options to start Open vSwitch daemon with.
+# Example: '-vconsole:dbg -vsyslog:dbg -vfile:dbg -vFACILITY:clock2'
+# OVS_VSWITCHD_OPTS=
+{%- if openvswitch.get('logging', {}).vswitchd is defined %}
+ {%- set _vswitchd_opts = [] %}
+ {%- for opt in ['console', 'file', 'syslog'] %}
+ {%- if openvswitch.logging.vswitchd.get(opt) %}
+ {%- do _vswitchd_opts.append("-v"+ opt + ":" + openvswitch.logging.vswitchd.get(opt)) %}
+ {%- endif %}
+ {%- endfor %}
+ {%- if openvswitch.logging.vswitchd.facility is defined %}
+ {%- do _vswitchd_opts.append("-vFACILITY:" + openvswitch.logging.vswitchd.facility) %}
+ {%- endif %}
+OVS_VSWITCHD_OPTS="{{ ' '.join(_vswitchd_opts) }}"
+{%- endif %}
+
+# OVSDB_OPTS: Extra options to pass to ovs-ctl.
+# Options to start Open vSwitch DB daemon with.
+# Example: '-vconsole:dbg -vsyslog:dbg -vfile:dbg -vFACILITY:clock2'
+# OVSDB_OPTS=
+{%- if openvswitch.get('logging', {}).ovsdb is defined %}
+ {%- set _ovsdb_opts = [] %}
+ {%- for opt in ['console', 'file', 'syslog'] %}
+ {%- if openvswitch.logging.ovsdb.get(opt) %}
+ {%- do _ovsdb_opts.append("-v" + opt + ":" + openvswitch.logging.ovsdb.get(opt)) %}
+ {%- endif %}
+ {%- endfor %}
+ {%- if openvswitch.logging.ovsdb.facility is defined %}
+ {%- do _ovsdb_opts.append("-vFACILITY:" + openvswitch.logging.ovsdb.facility) %}
+ {%- endif %}
+OVSDB_OPTS="{{ ' '.join(_ovsdb_opts) }}"
+{%- endif %}
diff --git a/linux/files/setup-loopback-device.upstart b/linux/files/setup-loopback-device.upstart
index a1acd24..2b63ac7 100644
--- a/linux/files/setup-loopback-device.upstart
+++ b/linux/files/setup-loopback-device.upstart
@@ -1,12 +1,10 @@
description "Setup {{ device_name }} device"
start on filesystem
-task
-pre-start script
- if /sbin/losetup {{ device_name }}; then
- stop ; exit 0
- fi
+pre-start exec losetup {{ device_name }} {{ file }}
+post-stop exec losetup -d {{ device_name }}
+
+script
+ while losetup {{ device_name }} ; do sleep 60 ; done
end script
-
-exec losetup {{ device_name }} {{ file }}
diff --git a/linux/map.jinja b/linux/map.jinja
index 3973c7f..4236a14 100644
--- a/linux/map.jinja
+++ b/linux/map.jinja
@@ -274,8 +274,11 @@
'service': 'multipath'
},
},
-}, grain='os_family', merge=salt['pillar.get']('linux:storage')) %}
-
+}, merge=salt['grains.filter_by']({
+ 'trusty': {
+ 'lvm_services': ['udev'],
+ },
+}, grain='oscodename', merge=salt['pillar.get']('linux:storage'))) %}
{% set monitoring = salt['grains.filter_by']({
'default': {
diff --git a/linux/meta/fluentd.yml b/linux/meta/fluentd.yml
index 621a378..94b1fbc 100644
--- a/linux/meta/fluentd.yml
+++ b/linux/meta/fluentd.yml
@@ -109,4 +109,64 @@
tag: 'metric.**'
type: relabel
label: default_metric
+{%- else %}
+agent:
+ config:
+ label:
+ default_metric:
+ filter:
+ metric_hdd_errors_parse:
+ tag: metric.hdd_errors
+ type: parser
+ key_name: Payload
+ parser:
+ type: regexp
+ format: '/(?<device>[sv]d[a-z]+\d*)/'
+ metric_hdd_errors:
+ tag: metric.hdd_errors
+ require:
+ - metric_hdd_errors_parse
+ type: prometheus
+ metric:
+ - name: hdd_errors_total
+ type: counter
+ desc: The total number of hdd errors.
+ label:
+ - name: host
+ value: ${Hostname}
+ - name: device
+ value: ${device}
+ syslog:
+ input:
+ syslog_file:
+ type: tail
+ tag: linux.syslog
+ path: /var/log/syslog
+ pos_file: {{ pillar.fluentd.agent.dir.positiondb }}/linux_syslog.pos
+ suppress_parse_error_log: true
+ parser:
+ type: regexp
+ format: >-
+ '/(?<Payload>.*(?<device>[sv]d[a-z]{1,2}\d{0,3}).*)/'
+ match:
+ push_to_default:
+ tag: 'linux.**'
+ type: copy
+ store:
+ - type: relabel
+ label: default_output
+ - type: rewrite_tag_filter
+ rule:
+ - name: Payload
+ regexp: >-
+ 'error.*\b[sv]d[a-z]{1,2}\d{0,3}\b.*'
+ result: metric.hdd_errors
+ - name: Payload
+ regexp: >-
+ '\b[sv]d[a-z]{1,2}\d{0,3}\b.*error'
+ result: metric.hdd_errors
+ push_to_metric:
+ tag: 'metric.**'
+ type: relabel
+ label: default_metric
{%- endif %}
diff --git a/linux/network/init.sls b/linux/network/init.sls
index 56b05a5..8a7d458 100644
--- a/linux/network/init.sls
+++ b/linux/network/init.sls
@@ -16,6 +16,9 @@
{%- if network.systemd|length > 0 %}
- linux.network.systemd
{%- endif %}
+{%- if network.openvswitch is defined %}
+- linux.network.openvswitch
+{%- endif %}
{%- if network.interface|length > 0 %}
- linux.network.interface
{%- endif %}
diff --git a/linux/network/openvswitch.sls b/linux/network/openvswitch.sls
new file mode 100644
index 0000000..474a84c
--- /dev/null
+++ b/linux/network/openvswitch.sls
@@ -0,0 +1,26 @@
+{%- from "linux/map.jinja" import network with context %}
+
+{%- if network.get('openvswitch', {}).get('enabled', False) %}
+
+openvswitch_pkgs:
+ pkg.installed:
+ - pkgs: {{ network.ovs_pkgs }}
+
+/etc/default/openvswitch-switch:
+ file.managed:
+ - source: salt://linux/files/openvswitch-switch.default
+ - template: jinja
+ - require:
+ - pkg: openvswitch_pkgs
+
+openvswitch_switch_service:
+ service.running:
+ - name: openvswitch-switch
+ - enable: true
+ {%- if grains.get('noservices') %}
+ - onlyif: /bin/false
+ {%- endif %}
+ - watch:
+ - file: /etc/default/openvswitch-switch
+
+{%- endif %}
diff --git a/linux/system/hugepages.sls b/linux/system/hugepages.sls
index 1c43262..54ccf45 100644
--- a/linux/system/hugepages.sls
+++ b/linux/system/hugepages.sls
@@ -19,23 +19,22 @@
{%- for hugepages_type, hugepages in system.kernel.hugepages.items() %}
-{%- if hugepages.get('mount', False) or hugepages.get('default', False) %}
-
hugepages_mount_{{ hugepages_type }}:
mount.mounted:
- name: {{ hugepages.mount_point }}
- - device: Hugetlbfs-kvm
+ - device: Hugetlbfs-kvm-{{ hugepages.size|lower }}
- fstype: hugetlbfs
- mkmnt: true
- opts: mode=775,pagesize={{ hugepages.size }}
+ - mount: {{ hugepages.mount|default('true') }}
# Make hugepages available right away with a temporary systctl write
# This will be handled via krn args after reboot, so don't use `sysctl.present`
+{%- if hugepages.get('default', False) %}
hugepages_sysctl_vm_nr_hugepages:
cmd.run:
- name: "sysctl vm.nr_hugepages={{ hugepages.count }}"
- unless: "sysctl vm.nr_hugepages | grep -qE '{{ hugepages.count }}'"
-
{%- endif %}
{%- endfor %}
diff --git a/linux/system/init.sls b/linux/system/init.sls
index c1b13e4..ad3681a 100644
--- a/linux/system/init.sls
+++ b/linux/system/init.sls
@@ -117,3 +117,6 @@
{%- if system.banner is defined %}
- linux.system.banner
{%- endif %}
+{%- if system.mcelog is defined %}
+- linux.system.mcelog
+{%- endif %}
diff --git a/linux/system/mcelog.sls b/linux/system/mcelog.sls
new file mode 100644
index 0000000..c2d0fd4
--- /dev/null
+++ b/linux/system/mcelog.sls
@@ -0,0 +1,32 @@
+{%- from "linux/map.jinja" import system with context %}
+{%- if system.enabled %}
+
+{%- if system.get('mcelog',{}).get('enabled', False) %}
+
+mcelog_packages:
+ pkg.installed:
+ - name: mcelog
+
+mcelog_conf:
+ file.managed:
+ - name: /etc/mcelog/mcelog.conf
+ - source: salt://linux/files/mcelog.conf
+ - template: jinja
+ - user: root
+ - group: root
+ - mode: 644
+ - require:
+ - pkg: mcelog_packages
+
+mce_service:
+ service.running:
+ - name: mcelog
+ - enable: true
+ - require:
+ - pkg: mcelog_packages
+ - watch:
+ - file: mcelog_conf
+
+{%- endif %}
+
+{%- endif %}
diff --git a/tests/pillar/system.sls b/tests/pillar/system.sls
index eb6201e..8aeb9d7 100644
--- a/tests/pillar/system.sls
+++ b/tests/pillar/system.sls
@@ -369,3 +369,8 @@
interval: 20
logpath: "/var/mylog/atop"
outfile: "/var/mylog/atop/daily.log"
+ mcelog:
+ enabled: true
+ logging:
+ syslog: true
+ syslog_error: true