Fix wd restart behaviour
* run_wd_keepalive option - required, to be clearly catched by
salt service reload\restart
Set it by default=1, on service lvl.
* Add possibility to configure ping\load params
* Remove kernel module manipulations:
- Passing params to conf w\o reboot - makes no sense
- Params should be also passed to sysfs, if needed.
Those funtion should be done via linux-formula
Readme updated
* Mics: fix schema,update to latest run_tests, fix .travis tests
* Remove WA for LP:1448924 since fix already released.
Closes-Bug: PROD-19627
Change-Id: Ib8c380f178f7efd07c50557d0a81009f63992671
diff --git a/.kitchen.yml b/.kitchen.yml
index 44a40e0..6b9a180 100644
--- a/.kitchen.yml
+++ b/.kitchen.yml
@@ -29,7 +29,7 @@
platforms:
- name: <%=ENV['PLATFORM'] || 'ubuntu-xenial'%>
driver_config:
- image: <%=ENV['PLATFORM'] || 'trevorj/salty-whales:xenial'%>
+ image: <%=ENV['PLATFORM'] || 'epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3'%>
platform: ubuntu
verifier:
@@ -38,7 +38,7 @@
suites:
- - name: server_single
+ - name: server
provisioner:
pillars-from-files:
watchdog.sls: tests/pillar/server.sls
diff --git a/.travis.yml b/.travis.yml
index a17ce65..c506297 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -22,10 +22,15 @@
- bundle install
env:
- - PLATFORM=trevorj/salty-whales:xenial-2017.7 SUITE=server-single
- - PLATFORM=trevorj/salty-whales:trusty-2017.7 SUITE=server-single
- - PLATFORM=trevorj/salty-whales:xenial SUITE=server-single
- - PLATFORM=trevorj/salty-whales:trusty SUITE=server-single
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3 SUITE=server_kernel_module
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3 SUITE=server
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2017.7 SUITE=server_kernel_module
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2017.7 SUITE=server
+#
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3 SUITE=server_kernel_module
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2016.3 SUITE=server
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2017.7 SUITE=server_kernel_module
+ - PLATFORM=epcim/salt-formulas:saltstack-ubuntu-xenial-salt-2017.7 SUITE=server
before_script:
- set -o pipefail
diff --git a/README.rst b/README.rst
index c97b975..f821b3d 100644
--- a/README.rst
+++ b/README.rst
@@ -15,7 +15,7 @@
watchdog hardware will cause the reset. In the case of the software watchdog the
ability to reboot will depend on the state of the machines and interrupts.
-This formula installs and configure watchdog daemon...
+This formula installs and configure watchdog daemon
Sample Pillars
==============
@@ -34,20 +34,53 @@
timeout: 60
device: /dev/watchdog
- # Salt Stack will automatically detect the necessary kernel module which needs to be loaded (ex. hpwdt, iTCO_wdt).
- # If the hardware model is not predefined in map.jinja the default watchdog driver is used: softdog
- # You may specify the kernel parameters if needed:
+
+Sample Pillars with kernel module
+=================================
+
+Salt Stack will automatically detect the necessary kernel module which needs to be loaded (ex. hpwdt, iTCO_wdt).
+If the hardware model is not predefined in map.jinja the default watchdog driver is used: softdog
+You may specify the kernel parameters if needed:
+
+.. code-block:: yaml
+
+ watchdog:
+ server:
+ admin: root
+ enabled: true
+ interval: 1
+ log_dir: /var/log/watchdog
+ realtime: yes
+ timeout: 60
+ device: /dev/watchdog
+ module: softdog
+ ......
+ ......
+ linux:
+ system:
kernel:
- parameter:
- soft_panic: 1
- parameter: value
- parameter_only_without_value: none
+ module:
+ softdog:
+ option:
+ soft_panic: 1
+
+INFO: extra formula [salt-formula-linux](https://github.com/salt-formulas/salt-formula-linux) required.
+
+In that case, apply command should also care about linux state. For example:
+
+
+.. code-block:: bash
+
+ salt "kvm0*" -l debug state.apply watchdog.server,linux.system.kernel -l debug
More Information
================
https://github.com/torvalds/linux/blob/master/Documentation/watchdog/watchdog-api.txt
+Those formula also support json-schema definition with all options.
+Please refer to "watchdog/schemas/\*.yaml" for more information.
+
Documentation and Bugs
diff --git a/metadata/service/server/init.yml b/metadata/service/server/init.yml
index 08b899e..7fcaf7e 100644
--- a/metadata/service/server/init.yml
+++ b/metadata/service/server/init.yml
@@ -6,3 +6,5 @@
watchdog:
server:
enabled: true
+ # Enable correct restart of service
+ run_wd_keepalive: 0
diff --git a/tests/pillar/server.sls b/tests/pillar/server.sls
index 62b28c7..9cdce64 100644
--- a/tests/pillar/server.sls
+++ b/tests/pillar/server.sls
@@ -1,8 +1,10 @@
watchdog:
server:
enabled: true
- timeout: 60
- # These parametrs should create file in /etc/modprobe.d/kernel_module.conf with content "option module soft_panic=1 parameter=second value_only"
- kernel:
- parameter:
- nowayout: 0
+ timeout: 360
+ ping:
+ 1: 127.0.0.1
+ 2: 0.0.0.0
+ ping_nic:
+ 1: lo
+ max_load_1: 100
diff --git a/tests/pillar/server_kernel_module.sls b/tests/pillar/server_kernel_module.sls
index e7ff6cf..576a03d 100644
--- a/tests/pillar/server_kernel_module.sls
+++ b/tests/pillar/server_kernel_module.sls
@@ -1,10 +1,12 @@
watchdog:
server:
enabled: true
- timeout: 60
+ timeout: 360
+ module: softdog
+linux:
+ system:
kernel:
- parameter:
- soft_panic: 1
- parameter: second
- value_only: none
- module:
+ module:
+ softdog:
+ option:
+ soft_panic: 1
diff --git a/tests/run_tests.sh b/tests/run_tests.sh
index d2d1221..a348912 100755
--- a/tests/run_tests.sh
+++ b/tests/run_tests.sh
@@ -28,6 +28,8 @@
SALT_OPTS="${SALT_OPTS} --retcode-passthrough --local -c ${SALT_CONFIG_DIR} --log-file=/dev/null"
+IGNORE_MODELVALIDATE_MASK=${IGNORE_MODELVALIDATE_MASK:-"novalidate"}
+
if [ "x${SALT_VERSION}" != "x" ]; then
PIP_SALT_VERSION="==${SALT_VERSION}"
fi
@@ -47,9 +49,8 @@
virtualenv $VENV_DIR
source ${VENV_DIR}/bin/activate
python -m pip install salt${PIP_SALT_VERSION}
- python -m pip install jsonschema
- if [[ -f ${CURDIR}/pip_requirements.txt ]]; then
- python -m pip install -r ${CURDIR}/pip_requirements.txt
+ if [[ -f ${CURDIR}/test-requirements.txt ]]; then
+ python -m pip install -r ${CURDIR}/test-requirements.txt
fi
}
@@ -155,19 +156,24 @@
}
prepare() {
- [ -d ${BUILDDIR} ] && mkdir -p ${BUILDDIR}
+ if [[ -f ${BUILDDIR}/.prepare_done ]]; then
+ log_info "${BUILDDIR}/.prepare_done exist, not rebuilding BUILDDIR"
+ return
+ fi
+ [[ -d ${BUILDDIR} ]] && mkdir -p ${BUILDDIR}
[[ ! -f "${VENV_DIR}/bin/activate" ]] && setup_virtualenv
setup_mock_bin
setup_pillar
setup_salt
install_dependencies
+ link_modules
+ touch ${BUILDDIR}/.prepare_done
}
lint_releasenotes() {
[[ ! -f "${VENV_DIR}/bin/activate" ]] && setup_virtualenv
source ${VENV_DIR}/bin/activate
- python -m pip install reno
reno lint ${CURDIR}/../
}
@@ -204,22 +210,30 @@
}
run_model_validate(){
- if [ -d ${SCHEMARDIR} ]; then
- # model validator require py modules
- fetch_dependency "salt:https://github.com/salt-formulas/salt-formula-salt"
- link_modules
- # Rendered Example:
- # python $(which salt-call) --local -c /test1/maas/tests/build/salt --id=maas_cluster modelschema.model_validate maas cluster
- for role in ${SCHEMARDIR}/*.yaml; do
- state_name=$(basename "${role%*.yaml}")
- minion_id="${state_name}"
- # in case debug-reruns, usefull to make cleanup
- [ -n "$DEBUG" ] && { salt_run saltutil.clear_cache; salt_run saltutil.refresh_pillar; salt_run saltutil.sync_all; }
- salt_run -m ${DEPSDIR}/salt-formula-salt --id=${minion_id} modelschema.model_validate ${FORMULA_NAME} ${state_name} || { log_err "Execution of ${FORMULA_NAME}.${state_name} failed"; exit 1 ; }
+ # Run modelschema.model_validate validation.
+ # TEST iterateble, run for `each formula ROLE against each ROLE_PILLARNAME`
+ # Pillars should be named in conviend ROLE_XXX.sls or ROLE.sls
+ # Example:
+ # client.sls client_auth.sls server.sls server_auth.sls
+ if [ -d ${SCHEMARDIR} ]; then
+ # model validator require py modules
+ fetch_dependency "salt:https://github.com/salt-formulas/salt-formula-salt"
+ link_modules
+ salt_run saltutil.clear_cache; salt_run saltutil.refresh_pillar; salt_run saltutil.sync_all;
+ for role in ${SCHEMARDIR}/*.yaml; do
+ role_name=$(basename "${role%*.yaml}")
+ for pillar in $(ls pillar/${role_name}*.sls | grep -v ${IGNORE_MODELVALIDATE_MASK} ); do
+ pillar_name=$(basename "${pillar%*.sls}")
+ local _message="FORMULA:${FORMULA_NAME} ROLE:${role_name} against PILLAR:${pillar_name}"
+ log_info "model_validate ${_message}"
+ # Rendered Example:
+ # python $(which salt-call) --local -c /test1/maas/tests/build/salt --id=maas_cluster modelschema.model_validate maas cluster
+ salt_run -m ${DEPSDIR}/salt-formula-salt --id=${pillar_name} modelschema.model_validate ${FORMULA_NAME} ${role_name} || { log_err "Execution of model_validate ${_message} failed"; exit 1 ; }
done
- else
- log_err "${SCHEMARDIR} not found!";
- fi
+ done
+ else
+ log_info "${SCHEMARDIR} not found!";
+ fi
}
dependency_check() {
diff --git a/tests/test-requirements.txt b/tests/test-requirements.txt
new file mode 100644
index 0000000..a0f561a
--- /dev/null
+++ b/tests/test-requirements.txt
@@ -0,0 +1,2 @@
+jsonschema
+reno
diff --git a/watchdog/files/default_watchdog b/watchdog/files/default_watchdog
new file mode 100644
index 0000000..945aa67
--- /dev/null
+++ b/watchdog/files/default_watchdog
@@ -0,0 +1,11 @@
+{%- from "watchdog/map.jinja" import server with context -%}
+run_watchdog=1
+# Start wd_keepalive after stopping watchdog? 0 or 1
+run_wd_keepalive={{ server.run_wd_keepalive|default(1) }}
+# Load module before starting watchdog
+watchdog_module={{ server.module }}
+# Specify additional watchdog options here (see manpage).
+
+{#-
+vim: syntax=jinja
+-#}
diff --git a/watchdog/files/kernel_module.conf b/watchdog/files/kernel_module.conf
deleted file mode 100644
index 05642c2..0000000
--- a/watchdog/files/kernel_module.conf
+++ /dev/null
@@ -1,2 +0,0 @@
-{%- from "watchdog/map.jinja" import server with context -%}
-option {{ server.module }}{% for (parameter, value) in server.kernel.parameter.items() %} {{ parameter }}{%if value %}={{ value }}{% endif %}{% endfor %}
diff --git a/watchdog/files/watchdog.conf b/watchdog/files/watchdog.conf
index 98b4238..25e570f 100644
--- a/watchdog/files/watchdog.conf
+++ b/watchdog/files/watchdog.conf
@@ -1,7 +1,19 @@
{%- from "watchdog/map.jinja" import server with context %}
#ping = 172.31.14.1
#ping = 172.26.1.255
+{%- if server.ping is defined %}
+ {%- for _, ip in server.ping.iteritems() %}
+ping = {{ ip }}
+ {%- endfor %}
+{%- endif %}
+
#interface = eth0
+{% if server.ping_nic is defined %}
+ {%- for _, nic in server.ping_nic.iteritems() %}
+interface = {{ nic }}
+ {%- endfor %}
+{% endif %}
+
#file = /var/log/messages
#change = 1407
@@ -9,8 +21,19 @@
# These values will hopefully never reboot your machine during normal use
# (if your machine is really hung, the loadavg will go much higher than 25)
#max-load-1 = 24
+{%- if server.max_load_1 is defined %}
+max-load-1 = {{ server.max_load_1 }}
+{%- endif %}
+
#max-load-5 = 18
+{%- if server.max_load_5 is defined %}
+max-load-5 = {{ server.max_load_5 }}
+{%- endif %}
+
#max-load-15 = 12
+{%- if server.max_load_15 is defined %}
+max-load-15 = {{ server.max_load_15 }}
+{%- endif %}
# Note that this is the number of pages!
# To get the real size, check how large the pagesize is on your machine.
@@ -23,6 +46,9 @@
#test-timeout =
#watchdog-device = /dev/watchdog
+{%- if server.device is defined %}
+watchdog-device = {{ server.device }}
+{%- endif %}
# Defaults compiled into the binary
#temperature-device =
@@ -30,39 +56,38 @@
# Defaults compiled into the binary
#admin = root
+{%- if server.admin is defined %}
+admin = {{ server.admin }}
+{%- endif %}
+
#interval = 1
+{%- if server.interval is defined %}
+interval = {{ server.interval }}
+{%- endif %}
+
#logtick = 1
+
#log-dir = /var/log/watchdog
+{%- if server.log_dir is defined %}
+log-dir = {{ server.log_dir }}
+{%- endif %}
# This greatly decreases the chance that watchdog won't be scheduled before
# your machine is really loaded
-realtime = yes
+#realtime = yes
+realtime = {{ server.realtime|default('yes') }}
+
priority = 1
# Check if rsyslogd is still running by enabling the following line
#pidfile = /var/run/rsyslogd.pid
-
-{%- if server.admin is defined %}
-admin = {{ server.admin }}
-{%- endif %}
-
-{%- if server.interval is defined %}
-interval = {{ server.interval }}
-{%- endif %}
-
-{%- if server.log_dir is defined %}
-log-dir = {{ server.log_dir }}
-{%- endif %}
-
-{%- if server.realtime is defined %}
-realtime = {{ server.realtime }}
-{%- endif %}
-
+# Set the watchdog device timeout during startup. If not set, the
+# default is driver-dependent.
{%- if server.timeout is defined %}
watchdog-timeout = {{ server.timeout }}
{%- endif %}
-{%- if server.device is defined %}
-watchdog-device = {{ server.device }}
-{%- endif %}
+{#-
+vim: syntax=jinja
+-#}
diff --git a/watchdog/map.jinja b/watchdog/map.jinja
index 1699eb1..cae5c5e 100644
--- a/watchdog/map.jinja
+++ b/watchdog/map.jinja
@@ -1,20 +1,11 @@
{% set server = salt['grains.filter_by']({
'HP': {
'module': 'hpwdt',
- 'kernel': {
- 'parameter': {}
- }
},
'DELL': {
'module': 'iTCO_wdt',
- 'kernel': {
- 'parameter': {}
- }
},
'default': {
'module': 'softdog',
- 'kernel': {
- 'parameter': {}
- }
},
}, grain='manufacturer', merge=salt['pillar.get']('watchdog:server')) %}
diff --git a/watchdog/schemas/server.yaml b/watchdog/schemas/server.yaml
index 4bcd02d..51c1881 100644
--- a/watchdog/schemas/server.yaml
+++ b/watchdog/schemas/server.yaml
@@ -11,32 +11,24 @@
- enabled
properties:
+ enabled:
+ description: |
+ Enables Watchdog daemon service
+ type: boolean
admin:
description: |
Email address to send admin mail to
type: string
example: root
- enabled:
- description: |
- Enables Watchdog daemon service
- type: boolean
interval:
description: |
Set the interval between two writes to the watchdog device
type: integer
minimum: 1
- kernel:
+ module:
description: |
- Parameters for the kernel module which are used for loading the kernel module
+ Explicit name of required kernel module. By default "softdog" will be used.
type: string
- type: object
- parameters:
- parameter:
- description: |
- Name of the watchdog kernel module
- type: string
- items:
- $ref: "#/definitions/allow_kdump"
log_dir:
description: |
Set the log directory to capture the standard output and standard error from repair-binary and test-binary execution
@@ -57,58 +49,45 @@
Set the watchdog device name
type: string
example: /dev/watchdog
-
-definitions:
- # hpwdt
- allow_kdump:
+ run_wd_keepalive:
description: |
- Start a kernel dump after NMI occurs (int)
- type: integer
- minimum: 0
-
- # iTCO_wdt
- heartbeat:
+ Start wd_keepalive after stopping watchdog?Option required for correct daemon stop.
+ type: boolean
+ watchdog-timeout:
description: |
- Watchdog timeout in seconds. 5..76 (TCO v1) or 3..614 (TCO v2), default=30) (int)
+ Set the watchdog device timeout during startup. If not set, the
+ default is driver-dependent.
type: integer
- minimum: 3
- maximum: 614
-
- # iTCO_wdt, hpwdt, softdog
- nowayout:
+ max_load_1:
description: |
- Watchdog cannot be stopped once started (default=0) (bool)
- type: integer
- minimum: 0
- maximum: 1
-
- # hpwdt, softdog
- soft_margin:
+ Set the maximal allowed load average for a 1 minute span. Once
+ this load average is reached the system is rebooted. Default
+ value is 0. That means the load average check is disabled. Be
+ careful not to this parameter too low. To set a value less then
+ the predefined minimal value of 2, you have to use the -f
+ commandline option.
+ type: number
+ max_load_5:
description: |
- Watchdog soft_margin in seconds. (0 < soft_margin < 65536, default=60) (uint)
- type: integer
- minimum: 0
-
- # softdog
- soft_noboot:
+ Set the maximal allowed load average for a 5 minute span. Once
+ this load average is reached the system is rebooted. Default
+ value is 3/4*max-load-1. Be careful not to this parameter too
+ low. To set a value less then the predefined minimal value of 2,
+ you have to use the -f commandline option.
+ type: number
+ max_load_15:
description: |
- Softdog action, set to 1 to ignore reboots, 0 to reboot (default=0) (int)
- type: integer
- minimum: 0
- maximum: 1
-
- # softdog
- soft_panic:
+ Set the maximal allowed load average for a 15 minute span. Once
+ this load average is reached the system is rebooted. Default
+ value is 1/2*max-load-1. Be careful not to this parameter too
+ low. To set a value less then the predefined minimal value of 2,
+ you have to use the -f commandline option.
+ type: number
+ ping:
description: |
- Softdog action, set to 1 to panic, 0 to reboot (default=0) (int)
- type: integer
- minimum: 0
- maximum: 1
-
- # iTCO_wdt
- turn_SMI_watchdog_clear_off:
+ List of IP address for ping mode.
+ type: object
+ ping_nic:
description: |
- Turn off SMI clearing watchdog (depends on TCO-version)(default=1) (int)
- type: integer
- minimum: 0
- maximum: 1
+ Set interface name for network mode.
+ type: object
diff --git a/watchdog/server.sls b/watchdog/server.sls
index 5af0d55..b6d96a2 100644
--- a/watchdog/server.sls
+++ b/watchdog/server.sls
@@ -5,47 +5,15 @@
pkg.installed:
- name: watchdog
-#Fix bug https://bugs.launchpad.net/ubuntu/+source/watchdog/+bug/1448924 in Ubuntu Xenial (workaround)
-{% if grains['oscodename'] == 'xenial' %}
-/lib/systemd/system/watchdog.service:
- file.copy:
- - name: /etc/systemd/system/watchdog.service
- - source: /lib/systemd/system/watchdog.service
- - require:
- - pkg: watchdog_packages
- - require_in: /etc/systemd/system/watchdog.service
-
-/etc/systemd/system/watchdog.service:
- file.line:
- - name: /etc/systemd/system/watchdog.service
- - mode: ensure
- - after: \[Install\]
- - content: WantedBy=default.target
- - require:
- - file: /lib/systemd/system/watchdog.service
- - require_in: watchdog_service
-{% endif %}
-
/etc/default/watchdog:
- file.replace:
- - name: /etc/default/watchdog
- - pattern: ^watchdog_module=.*
- - repl: watchdog_module="{{ server.module }}"
- - require:
- - pkg: watchdog_packages
- - require_in: watchdog_service
-
-{%- if server.kernel.parameter is defined and server.kernel.parameter %}
-/etc/modprobe.d/kernel_module.conf:
file.managed:
- - name: /etc/modprobe.d/{{ server.module }}.conf
+ - name: /etc/default/watchdog
- template: jinja
- - source: salt://watchdog/files/kernel_module.conf
+ - source: salt://watchdog/files/default_watchdog
- makedirs: True
- require:
- pkg: watchdog_packages
- require_in: watchdog_service
-{%- endif %}
/etc/watchdog.conf:
file.managed:
@@ -61,5 +29,6 @@
- name: watchdog
- watch:
- file: /etc/watchdog.conf
+ - file: /etc/default/watchdog
{%- endif %}