| {%- from "linux/map.jinja" import system with context %} |
| {%- set mcelog = system.mcelog %} |
| # |
| # Example config file for mcelog |
| # mcelog is the user space backend that decodes and process machine check events |
| # (cpu hardware errors) reported by the CPU to the kernel |
| # |
| |
| # general format |
| #optionname = value |
| # white space is not allowed in value currently, except at the end where it is dropped |
| # |
| |
| # In general all command line options that are not commands work here. |
| # See man mcelog or mcelog --help for a list. |
| # e.g. to enable the --no-syslog option use |
| #no-syslog = yes (or no to disable) |
| # when the option has a argument |
| #logfile = /tmp/logfile |
| # below are the options which are not command line options. |
| |
| # Set CPU type for which mcelog decodes events: |
| #cpu = type |
| # For valid values for type please see mcelog --help. |
| # If this value is set incorrectly the decoded output will be likely incorrect. |
| # By default when this parameter is not set mcelog uses the CPU it is running on |
| # on very new kernels the mcelog events reported by the kernel also carry |
| # the CPU type which is used too when available and not overriden. |
| |
| # Enable daemon mode: |
| #daemon = yes |
| # By default mcelog just processes the currently pending events and exits. |
| # In daemon mode it will keep running as a daemon in the background and poll |
| # the kernel for events and then decode them. |
| |
| # Filter out known broken events by default. |
| filter = yes |
| # Don't log memory errors individually. |
| # They still get accounted if that is enabled. |
| #filter-memory-errors = yes |
| |
| # output in undecoded raw format to be easier machine readable |
| # (default is decoded). |
| #raw = yes |
| |
| # Set CPU Mhz to decode uptime from time stamp counter (output |
| # unreliable, not needed on new kernels which report the event time |
| # directly. A lot of systems don't have a linear time stamp clock |
| # and the output is wrong then. |
| # Normally mcelog tries to figure out if it the TSC is reliable |
| # and only uses the current frequency then. |
| # Setting a frequency forces timestamp decoding. |
| # This setting is obsolete with modern kernels which report the time |
| # directly. |
| #cpumhz = 1800.00 |
| |
| # log output options |
| # Log decoded machine checks in syslog (default stdout or syslog for daemon) |
| #syslog = yes |
| # Log decoded machine checks in syslog with error level |
| #syslog-error = yes |
| # Never log anything to syslog |
| #no-syslog = yes |
| # Append log output to logfile instead of stdout. Only when no syslog logging is active |
| #logfile = filename |
| |
| {%- if mcelog.logging is defined %} |
| |
| {%- if mcelog.logging.syslog is defined %} |
| syslog = {{ 'yes' if mcelog.logging.syslog else 'no' }} |
| {%- endif %} |
| {%- if mcelog.logging.syslog_error is defined %} |
| syslog-error = {{ 'yes' if mcelog.logging.syslog_error else 'no' }} |
| {%- endif %} |
| {%- if mcelog.logging.no_syslog is defined %} |
| no-syslog = {{ 'yes' if mcelog.logging.no_syslog else 'no' }} |
| {%- endif %} |
| {%- if mcelog.logging.logfile is defined %} |
| logfile = {{ mcelog.logging.logfile }} |
| {%- endif %} |
| |
| {%- endif %} |
| # Use SMBIOS information to decode DIMMs (needs root). |
| # This function is not recommended to use right now and generally not needed. |
| # The exception is memdb prepopulation, which is configured separately below. |
| #dmi = no |
| |
| # When in daemon mode run as this user after set up. |
| # Note that the triggers will run as this user too. |
| # Setting this to non root will mean that triggers cannot take some corrective |
| # action, like offlining objects. |
| #run-credentials-user = root |
| |
| # group to run as daemon with |
| # default to the group of the run-credentials-user |
| #run-credentials-group = nobody |
| |
| [server] |
| # user allowed to access client socket. |
| # when set to * match any |
| # root is always allowed to access. |
| # default: root only |
| client-user = root |
| # group allowed to access mcelog |
| # When no group is configured any group matches (but still user checking). |
| # when set to * match any |
| #client-group = root |
| # Path to the unix socket for client<->server communication. |
| # When no socket-path is configured the server will not start |
| #socket-path = /var/run/mcelog-client |
| # When mcelog starts it checks if a server is already running. This configures the timeout |
| # for this check. |
| #initial-ping-timeout = 2 |
| # |
| [dimm] |
| # Is the in memory DIMM error tracking enabled? |
| # Only works on systems with integrated memory controller and |
| # which are supported. |
| # Only takes effect in daemon mode. |
| dimm-tracking-enabled = yes |
| # Use DMI information from the BIOS to prepopulate DIMM database. |
| # Note this might not work with all BIOS and requires mcelog to run as root. |
| # Alternative is to let mcelog create DIMM objects on demand. |
| dmi-prepopulate = yes |
| # |
| # Execute these triggers when the rate of corrected or uncorrected |
| # Errors per DIMM exceeds the threshold. |
| # Note when the hardware does not report DIMMs this might also |
| # be per channel. |
| # The default of 10/24h is reasonable for server quality |
| # DDR3 DIMMs as of 2009/10. |
| #uc-error-trigger = dimm-error-trigger |
| uc-error-threshold = 1 / 24h |
| #ce-error-trigger = dimm-error-trigger |
| ce-error-threshold = 10 / 24h |
| |
| [socket] |
| # Enable memory error accounting per socket. |
| socket-tracking-enabled = yes |
| |
| # Threshold and trigger for uncorrected memory errors on a socket. |
| # mem-uc-error-trigger = socket-memory-error-trigger |
| |
| mem-uc-error-threshold = 100 / 24h |
| |
| # Trigger script for corrected memory errors on a socket. |
| mem-ce-error-trigger = socket-memory-error-trigger |
| |
| # Threshold on when to trigger a correct error for the socket. |
| |
| mem-ce-error-threshold = 100 / 24h |
| |
| # Log socket error threshold explicitely? |
| mem-ce-error-log = yes |
| |
| # Trigger script for uncorrected bus error events |
| bus-uc-threshold-trigger = bus-error-trigger |
| |
| # Trigger script for uncorrected IOMCA erors |
| iomca-threshold-trigger = iomca-error-trigger |
| |
| # Trigger script for other uncategorized errors |
| unknown-threshold-trigger = unknown-error-trigger |
| |
| [cache] |
| # Processing of cache error thresholds reported by Intel CPUs. |
| cache-threshold-trigger = cache-error-trigger |
| |
| # Should cache threshold events be logged explicitely? |
| cache-threshold-log = yes |
| |
| [page] |
| # Memory error accouting per 4K memory page. |
| # Threshold for the correct memory errors trigger script. |
| memory-ce-threshold = 10 / 24h |
| |
| # Trigger script for corrected errors. |
| # memory-ce-trigger = page-error-trigger |
| |
| # Should page threshold events be logged explicitely? |
| memory-ce-log = yes |
| |
| # specify the internal action in mcelog to exceeding a page error threshold |
| # this is done in addition to executing the trigger script if available |
| # off no action |
| # account only account errors |
| # soft try to soft-offline page without killing any processes |
| # This requires an uptodate kernel. Might not be successfull. |
| # hard try to hard-offline page by killing processes |
| # Requires an uptodate kernel. Might not be successfull. |
| # soft-then-hard First try to soft offline, then try hard offlining |
| #memory-ce-action = off|account|soft|hard|soft-then-hard |
| memory-ce-action = soft |
| |
| [trigger] |
| # Maximum number of running triggers |
| children-max = 2 |
| # execute triggers in this directory |
| directory = /etc/mcelog |