| Alex | e0c5b9e | 2019-04-23 18:51:23 -0500 | [diff] [blame] | 1 | import subprocess | 
|  | 2 | import socket | 
|  | 3 | import salt.utils | 
|  | 4 | import logging | 
|  | 5 | import os | 
|  | 6 | import re | 
|  | 7 | import json | 
|  | 8 |  | 
|  | 9 | __author__ = "Dzmitry Stremkouski" | 
|  | 10 | __copyright__ = "Copyright 2019, Mirantis Inc." | 
|  | 11 | __license__ = "Apache 2.0" | 
|  | 12 |  | 
|  | 13 | logger = logging.getLogger(__name__) | 
|  | 14 | stream = logging.StreamHandler() | 
|  | 15 | logger.addHandler(stream) | 
|  | 16 |  | 
|  | 17 |  | 
|  | 18 | def _failed_minions(out, agent, failed_minions): | 
|  | 19 |  | 
|  | 20 | ''' Verify failed minions ''' | 
|  | 21 |  | 
|  | 22 | if len(failed_minions) > 0: | 
|  | 23 | logger.error("%s check FAILED" % agent) | 
|  | 24 | logger.error("Some minions returned non-zero exit code or empty data") | 
|  | 25 | logger.error("Failed minions:" + str(failed_minions)) | 
|  | 26 | for minion in failed_minions: | 
|  | 27 | logger.error(minion) | 
|  | 28 | logger.debug(str(out[minion]['ret'])) | 
|  | 29 | __context__['retcode'] = 2 | 
|  | 30 | return False | 
|  | 31 |  | 
|  | 32 | return True | 
|  | 33 |  | 
|  | 34 |  | 
|  | 35 | def _minions_output(out, agent, ignore_dead, ignore_empty=False): | 
|  | 36 |  | 
|  | 37 | ''' Verify minions output and exit code ''' | 
|  | 38 |  | 
|  | 39 | if not out: | 
|  | 40 | logger.error("%s check FAILED" % agent) | 
|  | 41 | logger.error("No response from master cmd") | 
|  | 42 | __context__['retcode'] = 2 | 
|  | 43 | return False | 
|  | 44 |  | 
|  | 45 | if not ignore_dead: | 
|  | 46 | jid = out.itervalues().next()['jid'] | 
|  | 47 | job_stats = __salt__['saltutil.runner']( 'jobs.print_job', arg=[jid] ) or None | 
|  | 48 | if not job_stats: | 
|  | 49 | logger.error("%s check FAILED" % agent) | 
|  | 50 | logger.error("No response from master runner") | 
|  | 51 | __context__['retcode'] = 2 | 
|  | 52 | return False | 
|  | 53 |  | 
|  | 54 | job_result = job_stats[jid]['Result'] | 
|  | 55 | job_minions = job_stats[jid]['Minions'] | 
|  | 56 | if len(job_minions) != len(job_result): | 
|  | 57 | logger.error("%s check FAILED" % agent) | 
|  | 58 | logger.error("Some minions are offline") | 
|  | 59 | logger.error(list(set(job_minions) - set(job_result.keys()))) | 
|  | 60 | __context__['retcode'] = 2 | 
|  | 61 | return False | 
|  | 62 |  | 
|  | 63 | failed_minions = [] | 
|  | 64 | for minion in out: | 
|  | 65 | if 'retcode' in out[minion]: | 
|  | 66 | if out[minion]['retcode'] == 0: | 
|  | 67 | if not ignore_empty: | 
|  | 68 | if isinstance(out[minion]['ret'], bool): | 
|  | 69 | if minion not in failed_minions: | 
|  | 70 | failed_minions.append(minion) | 
|  | 71 | elif len(out[minion]['ret']) == 0: | 
|  | 72 | if minion not in failed_minions: | 
|  | 73 | failed_minions.append(minion) | 
|  | 74 | else: | 
|  | 75 | if minion not in failed_minions: | 
|  | 76 | failed_minions.append(minion) | 
|  | 77 | else: | 
|  | 78 | if minion not in failed_minions: | 
|  | 79 | failed_minions.append(minion) | 
|  | 80 |  | 
|  | 81 | if not _failed_minions(out, agent, failed_minions): | 
|  | 82 | __context__['retcode'] = 2 | 
|  | 83 | return False | 
|  | 84 |  | 
|  | 85 | return True | 
|  | 86 |  | 
|  | 87 |  | 
|  | 88 | def minions_check(wait_timeout=1, gather_job_wait_timeout=1, target='*', target_type='glob', ignore_dead=False): | 
|  | 89 |  | 
|  | 90 | ''' Verify minions are online ''' | 
|  | 91 |  | 
|  | 92 | agent = "Minions" | 
|  | 93 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 94 | tgt_type=target_type, | 
|  | 95 | fun='test.ping', | 
|  | 96 | timeout=wait_timeout, | 
|  | 97 | gather_job_timeout=gather_job_wait_timeout | 
|  | 98 | ) or None | 
|  | 99 |  | 
|  | 100 | return _minions_output(out, agent, ignore_dead, ignore_empty=True) | 
|  | 101 |  | 
|  | 102 |  | 
|  | 103 | def time_diff_check(time_diff=1, target='*', target_type='glob', ignore_dead=False, **kwargs): | 
|  | 104 |  | 
|  | 105 | ''' Verify time diff on servers ''' | 
|  | 106 |  | 
|  | 107 | agent = "Time diff" | 
|  | 108 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 109 | tgt_type=target_type, | 
|  | 110 | fun='status.time', | 
|  | 111 | arg=['%s'], | 
|  | 112 | timeout=3 | 
|  | 113 | ) or None | 
|  | 114 |  | 
|  | 115 | if not _minions_output(out, agent, ignore_dead): | 
|  | 116 | __context__['retcode'] = 2 | 
|  | 117 | return False | 
|  | 118 |  | 
|  | 119 | minions_times = {} | 
|  | 120 | env_times = [] | 
|  | 121 | verified_minions = [] | 
|  | 122 |  | 
|  | 123 | for minion in out: | 
|  | 124 | verified_minions.append(minion) | 
|  | 125 | if out[minion]['retcode'] == 0: | 
|  | 126 | minion_time = int(out[minion]['ret']) | 
|  | 127 | if str(minion_time) not in minions_times: | 
|  | 128 | minions_times[str(minion_time)] = [] | 
|  | 129 | minions_times[str(minion_time)].append(minion) | 
|  | 130 | env_times.append(minion_time) | 
|  | 131 |  | 
|  | 132 | env_times.sort() | 
|  | 133 | diff = env_times[-1] - env_times[0] | 
|  | 134 |  | 
|  | 135 | if diff > time_diff: | 
|  | 136 | __context__['retcode'] = 2 | 
|  | 137 | if kwargs.get("debug", False): | 
|  | 138 | return False, minions_times | 
|  | 139 | else: | 
|  | 140 | return False | 
|  | 141 |  | 
|  | 142 | if kwargs.get("debug", False): | 
|  | 143 | logger.info(verified_minions) | 
|  | 144 | return True | 
|  | 145 |  | 
|  | 146 |  | 
|  | 147 | def contrail_check(target='I@contrail:control or I@contrail:collector or I@opencontrail:compute or I@opencontrail:client', target_type='compound', ignore_dead=False, **kwargs): | 
|  | 148 |  | 
|  | 149 | ''' Verify contrail status returns nothing critical ''' | 
|  | 150 |  | 
|  | 151 | agent = "Contrail status" | 
|  | 152 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 153 | tgt_type=target_type, | 
|  | 154 | fun='cmd.run', | 
|  | 155 | arg=['contrail-status'], | 
|  | 156 | timeout=5 | 
|  | 157 | ) or None | 
|  | 158 |  | 
|  | 159 | if not _minions_output(out, agent, ignore_dead): | 
|  | 160 | __context__['retcode'] = 2 | 
|  | 161 | return False | 
|  | 162 |  | 
|  | 163 | failed_minions = [] | 
|  | 164 | pattern = '^(==|$|\S+\s+(active|backup|inactive\s\(disabled\son\sboot\)))' | 
|  | 165 | prog = re.compile(pattern) | 
|  | 166 |  | 
|  | 167 | validated = [] | 
|  | 168 | for minion in out: | 
|  | 169 | for line in out[minion]['ret'].split('\n'): | 
|  | 170 | if not prog.match(line) and minion not in failed_minions: | 
|  | 171 | failed_minions.append(minion) | 
|  | 172 | validated.append(minion) | 
|  | 173 |  | 
|  | 174 | if not _failed_minions(out, agent, failed_minions): | 
|  | 175 | __context__['retcode'] = 2 | 
|  | 176 | return False | 
|  | 177 |  | 
|  | 178 | if kwargs.get("debug", False): | 
|  | 179 | logger.info(validated) | 
|  | 180 | return True | 
|  | 181 |  | 
|  | 182 |  | 
|  | 183 | def galera_check(cluster_size=3, target='I@galera:master or I@galera:slave', target_type='compound', ignore_dead=False, **kwargs): | 
|  | 184 |  | 
|  | 185 | ''' Verify galera cluster size and state ''' | 
|  | 186 |  | 
|  | 187 | agent = "Galera status" | 
|  | 188 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 189 | tgt_type=target_type, | 
|  | 190 | fun='mysql.status', | 
|  | 191 | timeout=3 | 
|  | 192 | ) or None | 
|  | 193 |  | 
|  | 194 | if not _minions_output(out, agent, ignore_dead): | 
|  | 195 | __context__['retcode'] = 2 | 
|  | 196 | return False | 
|  | 197 |  | 
|  | 198 | failed_minions = [] | 
|  | 199 |  | 
|  | 200 | validated = [] | 
|  | 201 | for minion in out: | 
|  | 202 | if int(out[minion]['ret']['wsrep_cluster_size']) != int(cluster_size) and minion not in failed_minions: | 
|  | 203 | failed_minions.append(minion) | 
|  | 204 | if out[minion]['ret']['wsrep_evs_state'] != 'OPERATIONAL' and minion not in failed_minions: | 
|  | 205 | failed_minions.append(minion) | 
|  | 206 | validated.append(minion) | 
|  | 207 |  | 
|  | 208 | if not _failed_minions(out, agent, failed_minions): | 
|  | 209 | __context__['retcode'] = 2 | 
|  | 210 | return False | 
|  | 211 |  | 
|  | 212 | if kwargs.get("debug", False): | 
|  | 213 | logger.info(validated) | 
|  | 214 | logger.info("Cluster size: " + str(out[validated[0]]['ret']['wsrep_cluster_size'])) | 
|  | 215 | logger.info("Cluster state: " + str(out[validated[0]]['ret']['wsrep_evs_state'])) | 
|  | 216 | return True | 
|  | 217 |  | 
|  | 218 |  | 
|  | 219 | def _quote_str(s, l=False, r=False): | 
|  | 220 |  | 
|  | 221 | ''' Quting rabbitmq erl objects for json import ''' | 
|  | 222 |  | 
|  | 223 | if len(s) > 0: | 
|  | 224 | if l: | 
|  | 225 | s = s.lstrip() | 
|  | 226 | if r: | 
|  | 227 | s = s.rstrip() | 
|  | 228 | if (s[0] == "'") and (s[-1] != "'") and r and not l: | 
|  | 229 | s += "'" | 
|  | 230 | if (s[0] == '"') and (s[-1] != '"') and r and not l: | 
|  | 231 | s += '"' | 
|  | 232 | if (s[-1] == "'") and (s[0] != "'") and l and not r: | 
|  | 233 | s = "'" + s | 
|  | 234 | if (s[-1] == '"') and (s[0] != '"') and l and not r: | 
|  | 235 | s = '"' + s | 
|  | 236 | if (s[-1] != "'") and (s[-1] != '"') and (s[0] != "'") and (s[0] != '"'): | 
|  | 237 | s = '"' + s.replace('"', '\\\"') + '"' | 
|  | 238 | else: | 
|  | 239 | if (not l) and (not r) and s[0] != '"' and not s[-1] != '"': | 
|  | 240 | s= s.replace('"', '\\\"') | 
|  | 241 | return s.replace("'", '"') | 
|  | 242 | else: | 
|  | 243 | return s | 
|  | 244 |  | 
|  | 245 |  | 
|  | 246 | def _sanitize_rmqctl_output(string): | 
|  | 247 |  | 
|  | 248 | ''' Sanitizing rabbitmq erl objects for json import ''' | 
|  | 249 |  | 
|  | 250 | rabbitctl_json = "" | 
|  | 251 | for line in string.split(','): | 
|  | 252 | copy = line | 
|  | 253 | left = "" | 
|  | 254 | right = "" | 
|  | 255 | mid = copy | 
|  | 256 | lpar = False | 
|  | 257 | rpar = False | 
|  | 258 | if re.search('([\[\{\s]+)(.*)', copy): | 
|  | 259 | mid = re.sub('^([\[\{\s]+)','', copy) | 
|  | 260 | left = copy[:-len(mid)] | 
|  | 261 | copy = mid | 
|  | 262 | lpar = True | 
|  | 263 | if re.search('(.*)([\]\}\s]+)$', copy): | 
|  | 264 | mid = re.sub('([\]\}\s]+)$','', copy) | 
|  | 265 | right = copy[len(mid):] | 
|  | 266 | copy = mid | 
|  | 267 | rpar = True | 
|  | 268 | result = left + _quote_str(mid, l=lpar, r=rpar) + right | 
|  | 269 | if (not rpar) and lpar and (len(left.strip()) > 0) and (left.strip()[-1] == '{'): | 
|  | 270 | result += ":" | 
|  | 271 | else: | 
|  | 272 | result += "," | 
|  | 273 | rabbitctl_json += result | 
|  | 274 |  | 
|  | 275 | rabbitctl_json = rabbitctl_json[:-1] | 
|  | 276 | new_rabbitctl_json = rabbitctl_json | 
|  | 277 | for s in re.findall('"[^:\[{\]}]+"\s*:\s*("[^\[{\]}]+")', rabbitctl_json): | 
|  | 278 | if '"' in s[1:][:-1]: | 
|  | 279 | orig = s | 
|  | 280 | changed = '"' + s.replace('\\', '\\\\').replace('"', '\\\"') + '"' | 
|  | 281 | new_rabbitctl_json = new_rabbitctl_json.replace(orig, changed) | 
|  | 282 | return new_rabbitctl_json | 
|  | 283 |  | 
|  | 284 |  | 
|  | 285 | def rabbitmq_cmd(cmd): | 
|  | 286 |  | 
|  | 287 | ''' JSON formatted RabbitMQ command output ''' | 
|  | 288 |  | 
|  | 289 | supported_commands = ['status', 'cluster_status', 'list_hashes', 'list_ciphers'] | 
|  | 290 | if cmd not in supported_commands: | 
|  | 291 | logger.error("Command is not supported yet, sorry") | 
|  | 292 | logger.error("Supported commands are: " + str(supported_commands)) | 
|  | 293 | __context__['retcode'] = 2 | 
|  | 294 | return False | 
|  | 295 |  | 
|  | 296 | proc = subprocess.Popen(['rabbitmqctl', cmd], stdout=subprocess.PIPE) | 
|  | 297 | stdout, stderr =  proc.communicate() | 
|  | 298 |  | 
|  | 299 | rabbitmqctl_cutoff = stdout[int(stdout.find('[')):int(stdout.rfind(']'))+1].replace('\n','') | 
|  | 300 | return json.loads(_sanitize_rmqctl_output(rabbitmqctl_cutoff)) | 
|  | 301 |  | 
|  | 302 |  | 
|  | 303 | def rabbitmq_check(target='I@rabbitmq:server', target_type='compound', ignore_dead=False, **kwargs): | 
|  | 304 |  | 
|  | 305 | ''' Verify rabbit cluster and it's alarms ''' | 
|  | 306 |  | 
|  | 307 | agent = "RabbitMQ status" | 
|  | 308 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 309 | tgt_type=target_type, | 
|  | 310 | fun='health_checks.rabbitmq_cmd', | 
|  | 311 | arg=['cluster_status'], | 
|  | 312 | timeout=3 | 
|  | 313 | ) or None | 
|  | 314 |  | 
|  | 315 | if not _minions_output(out, agent, ignore_dead): | 
|  | 316 | __context__['retcode'] = 2 | 
|  | 317 | return False | 
|  | 318 |  | 
|  | 319 | failed_minions = [] | 
|  | 320 |  | 
|  | 321 | for minion in out: | 
|  | 322 | rabbitmqctl_json = out[minion]['ret'] | 
|  | 323 | running_nodes = [] | 
|  | 324 | available_nodes = [] | 
|  | 325 | alarms = [] | 
|  | 326 | for el in rabbitmqctl_json: | 
|  | 327 | if 'alarms' in el: | 
|  | 328 | alarms = el['alarms'] | 
|  | 329 | if 'nodes' in el: | 
|  | 330 | available_nodes = el['nodes'][0]['disc'] | 
|  | 331 | if 'running_nodes' in el: | 
|  | 332 | running_nodes = el['running_nodes'] | 
|  | 333 |  | 
|  | 334 | if running_nodes.sort() == available_nodes.sort(): | 
|  | 335 | nodes_alarms = [] | 
|  | 336 | for node in running_nodes: | 
|  | 337 | for el in alarms: | 
|  | 338 | if node in el: | 
|  | 339 | if len(el[node]) > 0: | 
|  | 340 | nodes_alarms.append(el[node]) | 
|  | 341 | if len(nodes_alarms) > 0: | 
|  | 342 | failed_minions.append(minion) | 
|  | 343 | else: | 
|  | 344 | failed_minions.append(minion) | 
|  | 345 |  | 
|  | 346 | if not _failed_minions(out, agent, failed_minions): | 
|  | 347 | __context__['retcode'] = 2 | 
|  | 348 | return False | 
|  | 349 |  | 
|  | 350 | if kwargs.get("debug", False): | 
|  | 351 | logger.info(running_nodes) | 
|  | 352 | return True | 
|  | 353 |  | 
|  | 354 |  | 
|  | 355 | def haproxy_status(socket_path='/run/haproxy/admin.sock', buff_size = 8192, encoding = 'UTF-8', stats_filter=[]): | 
|  | 356 |  | 
|  | 357 | ''' JSON formatted haproxy status ''' | 
|  | 358 |  | 
|  | 359 | stat_cmd = 'show stat\n' | 
|  | 360 |  | 
|  | 361 | if not os.path.exists(socket_path): | 
|  | 362 | logger.error('Socket %s does not exist or haproxy not running' % socket_path) | 
|  | 363 | __context__['retcode'] = 2 | 
|  | 364 | return False | 
|  | 365 |  | 
|  | 366 | client = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM) | 
|  | 367 | client.connect(socket_path) | 
|  | 368 | stat_cmd = 'show stat\n' | 
|  | 369 |  | 
|  | 370 | client.send(bytearray(stat_cmd, encoding)) | 
|  | 371 | output = client.recv(buff_size) | 
|  | 372 |  | 
|  | 373 | res = "" | 
|  | 374 | while output: | 
|  | 375 | res += output.decode(encoding) | 
|  | 376 | output = client.recv(buff_size) | 
|  | 377 | client.close() | 
|  | 378 |  | 
|  | 379 | haproxy_stats = {} | 
|  | 380 | res_list = res.split('\n') | 
|  | 381 | fields = res_list[0][2:].split(',') | 
|  | 382 | stats_list = [] | 
|  | 383 | for line in res_list[1:]: | 
|  | 384 | if len(line.strip()) > 0: | 
|  | 385 | stats_list.append(line) | 
|  | 386 |  | 
|  | 387 | for i in range(len(stats_list)): | 
|  | 388 | element = {} | 
|  | 389 | for n in fields: | 
|  | 390 | element[n] = stats_list[i].split(',')[fields.index(n)] | 
|  | 391 | server_name = element.pop('pxname') | 
|  | 392 | server_type = element.pop('svname') | 
|  | 393 | if stats_filter: | 
|  | 394 | filtered_element = element.copy() | 
|  | 395 | for el in element: | 
|  | 396 | if el not in stats_filter: | 
|  | 397 | filtered_element.pop(el) | 
|  | 398 | element = filtered_element | 
|  | 399 | if server_name not in haproxy_stats: | 
|  | 400 | haproxy_stats[server_name] = {} | 
|  | 401 | if server_type == "FRONTEND" or server_type == "BACKEND": | 
|  | 402 | haproxy_stats[server_name][server_type] = element | 
|  | 403 | else: | 
|  | 404 | if 'UPSTREAM' not in haproxy_stats[server_name]: | 
|  | 405 | haproxy_stats[server_name]['UPSTREAM'] = {} | 
|  | 406 | haproxy_stats[server_name]['UPSTREAM'][server_type] = element | 
|  | 407 |  | 
|  | 408 | return haproxy_stats | 
|  | 409 |  | 
|  | 410 |  | 
|  | 411 | def haproxy_check(target='I@haproxy:proxy', target_type='compound', ignore_dead=False, ignore_services=[], ignore_upstreams=[], ignore_no_upstream=False, **kwargs): | 
|  | 412 |  | 
|  | 413 | ''' Verify haproxy backends status ''' | 
|  | 414 |  | 
|  | 415 | agent = "haproxy status" | 
|  | 416 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 417 | tgt_type=target_type, | 
|  | 418 | fun='health_checks.haproxy_status', | 
|  | 419 | arg=["stats_filter=['status']"], | 
|  | 420 | timeout=3 | 
|  | 421 | ) or None | 
|  | 422 |  | 
|  | 423 | if not _minions_output(out, agent, ignore_dead): | 
|  | 424 | __context__['retcode'] = 2 | 
|  | 425 | return False | 
|  | 426 |  | 
|  | 427 | failed_minions = [] | 
|  | 428 | verified_minions = [] | 
|  | 429 | for minion in out: | 
|  | 430 | verified_minions.append(minion) | 
|  | 431 | haproxy_json = out[minion]['ret'] | 
|  | 432 | for service in haproxy_json: | 
|  | 433 | if service not in ignore_services: | 
|  | 434 | if haproxy_json[service]['FRONTEND']['status'] != 'OPEN': | 
|  | 435 | if minion not in failed_minions: | 
|  | 436 | failed_minions.append(minion) | 
|  | 437 | if haproxy_json[service]['BACKEND']['status'] != 'UP': | 
|  | 438 | if minion not in failed_minions: | 
|  | 439 | failed_minions.append(minion) | 
|  | 440 | if 'UPSTREAM' in haproxy_json[service]: | 
|  | 441 | for upstream in haproxy_json[service]['UPSTREAM']: | 
|  | 442 | if upstream not in ignore_upstreams: | 
|  | 443 | if haproxy_json[service]['UPSTREAM'][upstream]['status'] != 'UP': | 
|  | 444 | if minion not in failed_minions: | 
|  | 445 | failed_minions.append(minion) | 
|  | 446 | else: | 
|  | 447 | if not ignore_no_upstream: | 
|  | 448 | if minion not in failed_minions: | 
|  | 449 | failed_minions.append(minion) | 
|  | 450 |  | 
|  | 451 | if not _failed_minions(out, agent, failed_minions): | 
|  | 452 | __context__['retcode'] = 2 | 
|  | 453 | return False | 
|  | 454 |  | 
|  | 455 | if kwargs.get("debug", False): | 
|  | 456 | logger.info(verified_minions) | 
|  | 457 | return True | 
|  | 458 |  | 
|  | 459 |  | 
|  | 460 | def df_check(target='*', target_type='glob', verify='space', space_limit=80, inode_limit=80, ignore_dead=False, ignore_partitions=[], **kwargs): | 
|  | 461 |  | 
|  | 462 | ''' Verify storage space/inodes status ''' | 
|  | 463 |  | 
|  | 464 | supported_options = ['space', 'inodes'] | 
|  | 465 | if verify not in supported_options: | 
|  | 466 | logger.error('Unsupported "verify" option.') | 
|  | 467 | logger.error('Supported options are: %s' % str(supported_options)) | 
|  | 468 | __context__['retcode'] = 2 | 
|  | 469 | return False | 
|  | 470 |  | 
|  | 471 | if verify == 'space': | 
|  | 472 | fun_cmd = 'disk.usage' | 
|  | 473 | json_arg = 'capacity' | 
|  | 474 | limit = space_limit | 
|  | 475 | elif verify == 'inodes': | 
|  | 476 | fun_cmd = 'disk.inodeusage' | 
|  | 477 | json_arg = 'use' | 
|  | 478 | limit = inode_limit | 
|  | 479 |  | 
|  | 480 | agent = "df status" | 
|  | 481 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 482 | tgt_type=target_type, | 
|  | 483 | fun=fun_cmd, | 
|  | 484 | timeout=3 | 
|  | 485 | ) or None | 
|  | 486 |  | 
|  | 487 | if not _minions_output(out, agent, ignore_dead): | 
|  | 488 | __context__['retcode'] = 2 | 
|  | 489 | return False | 
|  | 490 |  | 
|  | 491 | failed_minions = [] | 
|  | 492 | verified_minions = [] | 
|  | 493 | for minion in out: | 
|  | 494 | verified_minions.append(minion) | 
|  | 495 | df_json = out[minion]['ret'] | 
|  | 496 | for disk in df_json: | 
|  | 497 | if disk not in ignore_partitions: | 
|  | 498 | if int(df_json[disk][json_arg][:-1]) > int(limit): | 
|  | 499 | if minion not in failed_minions: | 
|  | 500 | failed_minions.append(minion) | 
|  | 501 |  | 
|  | 502 | if not _failed_minions(out, agent, failed_minions): | 
|  | 503 | __context__['retcode'] = 2 | 
|  | 504 | return False | 
|  | 505 |  | 
|  | 506 | if kwargs.get("debug", False): | 
|  | 507 | logger.info(verified_minions) | 
|  | 508 | return True | 
|  | 509 |  | 
|  | 510 |  | 
|  | 511 | def load_check(target='*', target_type='glob', la1=3, la5=3, la15=3, ignore_dead=False, **kwargs): | 
|  | 512 |  | 
|  | 513 | ''' Verify load average status ''' | 
|  | 514 |  | 
|  | 515 | agent = "load average status" | 
|  | 516 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 517 | tgt_type=target_type, | 
|  | 518 | fun='status.loadavg', | 
|  | 519 | timeout=3 | 
|  | 520 | ) or None | 
|  | 521 |  | 
|  | 522 | if not _minions_output(out, agent, ignore_dead): | 
|  | 523 | __context__['retcode'] = 2 | 
|  | 524 | return False | 
|  | 525 |  | 
|  | 526 | failed_minions = [] | 
|  | 527 | verified_minions = [] | 
|  | 528 | for minion in out: | 
|  | 529 | verified_minions.append(minion) | 
|  | 530 | la_json = out[minion]['ret'] | 
|  | 531 | if float(la_json['1-min']) > float(la1): | 
|  | 532 | if minion not in failed_minions: | 
|  | 533 | failed_minions.append(minion) | 
|  | 534 | if float(la_json['5-min']) > float(la5): | 
|  | 535 | if minion not in failed_minions: | 
|  | 536 | failed_minions.append(minion) | 
|  | 537 | if float(la_json['15-min']) > float(la15): | 
|  | 538 | if minion not in failed_minions: | 
|  | 539 | failed_minions.append(minion) | 
|  | 540 |  | 
|  | 541 | if not _failed_minions(out, agent, failed_minions): | 
|  | 542 | __context__['retcode'] = 2 | 
|  | 543 | return False | 
|  | 544 |  | 
|  | 545 | if kwargs.get("debug", False): | 
|  | 546 | logger.info(verified_minions) | 
|  | 547 | return True | 
|  | 548 |  | 
|  | 549 |  | 
|  | 550 | def netdev_check(target='*', target_type='glob', rx_drop_limit=0, tx_drop_limit=0, ignore_devices=[], ignore_dead=False, **kwargs): | 
|  | 551 |  | 
|  | 552 | ''' Verify netdev rx/tx drop status ''' | 
|  | 553 |  | 
|  | 554 | agent = "netdev rx/tx status" | 
|  | 555 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 556 | tgt_type=target_type, | 
|  | 557 | fun='status.netdev', | 
|  | 558 | timeout=3 | 
|  | 559 | ) or None | 
|  | 560 |  | 
|  | 561 | if not _minions_output(out, agent, ignore_dead): | 
|  | 562 | __context__['retcode'] = 2 | 
|  | 563 | return False | 
|  | 564 |  | 
|  | 565 | failed_minions = [] | 
|  | 566 | verified_minions = [] | 
|  | 567 | for minion in out: | 
|  | 568 | verified_minions.append(minion) | 
|  | 569 | dev_json = out[minion]['ret'] | 
|  | 570 | for netdev in dev_json: | 
|  | 571 | if netdev not in ignore_devices: | 
|  | 572 | if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit): | 
|  | 573 | if minion not in failed_minions: | 
|  | 574 | failed_minions.append(minion) | 
|  | 575 | if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit): | 
|  | 576 | if minion not in failed_minions: | 
|  | 577 | failed_minions.append(minion) | 
|  | 578 |  | 
|  | 579 | if not _failed_minions(out, agent, failed_minions): | 
|  | 580 | __context__['retcode'] = 2 | 
|  | 581 | return False | 
|  | 582 |  | 
|  | 583 | if kwargs.get("debug", False): | 
|  | 584 | logger.info(verified_minions) | 
|  | 585 | return True | 
|  | 586 |  | 
|  | 587 |  | 
|  | 588 | def mem_check(target='*', target_type='glob', used_limit=80, ignore_dead=False, **kwargs): | 
|  | 589 |  | 
|  | 590 | ''' Verify available memory status ''' | 
|  | 591 |  | 
|  | 592 | agent = "available memory status" | 
|  | 593 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 594 | tgt_type=target_type, | 
|  | 595 | fun='status.meminfo', | 
|  | 596 | timeout=3 | 
|  | 597 | ) or None | 
|  | 598 |  | 
|  | 599 | if not _minions_output(out, agent, ignore_dead): | 
|  | 600 | __context__['retcode'] = 2 | 
|  | 601 | return False | 
|  | 602 |  | 
|  | 603 | failed_minions = [] | 
|  | 604 | verified_minions = [] | 
|  | 605 | for minion in out: | 
|  | 606 | mem_avail = int(out[minion]['ret']['MemAvailable']['value']) | 
|  | 607 | mem_total = int(out[minion]['ret']['MemTotal']['value']) | 
|  | 608 | used_pct = float((mem_total - mem_avail) * 100 / mem_total) | 
|  | 609 | if used_pct > float(used_limit): | 
|  | 610 | if minion not in failed_minions: | 
|  | 611 | failed_minions.append(minion) | 
|  | 612 | else: | 
|  | 613 | verified_minions.append( { minion : str(used_pct) + '%' } ) | 
|  | 614 |  | 
|  | 615 | if not _failed_minions(out, agent, failed_minions): | 
|  | 616 | __context__['retcode'] = 2 | 
|  | 617 | return False | 
|  | 618 |  | 
|  | 619 | if kwargs.get("debug", False): | 
|  | 620 | logger.info(verified_minions) | 
|  | 621 | return True | 
|  | 622 |  | 
|  | 623 |  | 
|  | 624 | def ntp_status(params = ['-4', '-p', '-n']): | 
|  | 625 |  | 
|  | 626 | ''' JSON formatted ntpq command output ''' | 
|  | 627 |  | 
|  | 628 | ntp_states = [ | 
|  | 629 | { 'indicator': '#', 'comment': 'source selected, distance exceeds maximum value' }, | 
|  | 630 | { 'indicator': 'o', 'comment': 'source selected, Pulse Per Second (PPS) used' }, | 
|  | 631 | { 'indicator': '+', 'comment': 'source selected, included in final set' }, | 
|  | 632 | { 'indicator': 'x', 'comment': 'source false ticker' }, | 
|  | 633 | { 'indicator': '.', 'comment': 'source selected from end of candidate list' }, | 
|  | 634 | { 'indicator': '-', 'comment': 'source discarded by cluster algorithm' }, | 
|  | 635 | { 'indicator': '*', 'comment': 'current time source' }, | 
|  | 636 | { 'indicator': ' ', 'comment': 'source discarded high stratum, failed sanity' } | 
|  | 637 | ] | 
|  | 638 | ntp_state_indicators = [] | 
|  | 639 | for state in ntp_states: | 
|  | 640 | ntp_state_indicators.append(state['indicator']) | 
|  | 641 | source_types = {} | 
|  | 642 | source_types['l'] = "local (such as a GPS, WWVB)" | 
|  | 643 | source_types['u'] = "unicast (most common)" | 
|  | 644 | source_types['m'] = "multicast" | 
|  | 645 | source_types['b'] = "broadcast" | 
|  | 646 | source_types['-'] = "netaddr" | 
|  | 647 |  | 
|  | 648 | proc = subprocess.Popen(['ntpq'] + params, stdout=subprocess.PIPE) | 
|  | 649 | stdout, stderr =  proc.communicate() | 
|  | 650 |  | 
|  | 651 | ntp_lines = stdout.split('\n') | 
|  | 652 | fields = re.sub("\s+", " ", ntp_lines[0]).split() | 
|  | 653 | fields[fields.index('st')] = 'stratum' | 
|  | 654 | fields[fields.index('t')] = 'source_type' | 
|  | 655 |  | 
|  | 656 | ntp_peers = {} | 
|  | 657 | for line in ntp_lines[2:]: | 
|  | 658 | if len(line.strip()) > 0: | 
|  | 659 | element = {} | 
|  | 660 | values = re.sub("\s+", " ", line).split() | 
|  | 661 | for i in range(len(values)): | 
|  | 662 | if fields[i] == 'source_type': | 
|  | 663 | element[fields[i]] = { 'indicator': values[i], 'comment': source_types[values[i]] } | 
|  | 664 | elif fields[i] in ['stratum', 'when', 'poll', 'reach']: | 
|  | 665 | if values[i] == '-': | 
|  | 666 | element[fields[i]] = int(-1) | 
|  | 667 | else: | 
|  | 668 | element[fields[i]] = int(values[i]) | 
|  | 669 | elif fields[i] in ['delay', 'offset', 'jitter']: | 
|  | 670 | element[fields[i]] = float(values[i]) | 
|  | 671 | else: | 
|  | 672 | element[fields[i]] = values[i] | 
|  | 673 | peer = element.pop('remote') | 
|  | 674 | peer_state = peer[0] | 
|  | 675 | if peer_state in ntp_state_indicators: | 
|  | 676 | peer = peer[1:] | 
|  | 677 | else: | 
|  | 678 | peer_state = 'f' | 
|  | 679 | element['current'] = False | 
|  | 680 | if peer_state == '*': | 
|  | 681 | element['current'] = True | 
|  | 682 | for state in ntp_states: | 
|  | 683 | if state['indicator'] == peer_state: | 
|  | 684 | element['state'] = state.copy() | 
|  | 685 | if peer_state == 'f' and state['indicator'] == ' ': | 
|  | 686 | fail_state = state.copy() | 
|  | 687 | fail_state.pop('indicator') | 
|  | 688 | fail_state['indicator'] = 'f' | 
|  | 689 | element['state'] = fail_state | 
|  | 690 | ntp_peers[peer] = element | 
|  | 691 |  | 
|  | 692 | return ntp_peers | 
|  | 693 |  | 
|  | 694 |  | 
|  | 695 | def ntp_check(min_peers=1, max_stratum=3, target='*', target_type='glob', ignore_dead=False, **kwargs): | 
|  | 696 |  | 
|  | 697 | ''' Verify NTP peers status ''' | 
|  | 698 |  | 
|  | 699 | agent = "ntpd peers status" | 
|  | 700 | out = __salt__['saltutil.cmd']( tgt=target, | 
|  | 701 | tgt_type=target_type, | 
|  | 702 | fun='health_checks.ntp_status', | 
|  | 703 | timeout=3 | 
|  | 704 | ) or None | 
|  | 705 |  | 
|  | 706 | if not _minions_output(out, agent, ignore_dead): | 
|  | 707 | __context__['retcode'] = 2 | 
|  | 708 | return False | 
|  | 709 |  | 
|  | 710 | failed_minions = [] | 
|  | 711 | verified_minions = [] | 
|  | 712 | for minion in out: | 
|  | 713 | ntp_json = out[minion]['ret'] | 
|  | 714 | good_peers = [] | 
|  | 715 | for peer in ntp_json: | 
|  | 716 | if ntp_json[peer]['stratum'] < int(max_stratum) + 1: | 
|  | 717 | good_peers.append(peer) | 
|  | 718 | if len(good_peers) > int(min_peers) - 1: | 
|  | 719 | if minion not in verified_minions: | 
|  | 720 | verified_minions.append(minion) | 
|  | 721 | else: | 
|  | 722 | if minion not in failed_minions: | 
|  | 723 | failed_minions.append(minion) | 
|  | 724 |  | 
|  | 725 | if not _failed_minions(out, agent, failed_minions): | 
|  | 726 | __context__['retcode'] = 2 | 
|  | 727 | return False | 
|  | 728 |  | 
|  | 729 | if kwargs.get("debug", False): | 
|  | 730 | logger.info(verified_minions) | 
|  | 731 | return True |