blob: 3d1fa9a6f5eb898c1cc4cd04110c368ad664b7bf [file] [log] [blame]
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001import requests
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +02002import subprocess
3import socket
4import salt.utils
5import logging
6import os
7import re
8import json
9
10__author__ = "Dzmitry Stremkouski"
11__copyright__ = "Copyright 2019, Mirantis Inc."
12__license__ = "Apache 2.0"
13
14logger = logging.getLogger(__name__)
15stream = logging.StreamHandler()
16logger.addHandler(stream)
17
18
19def _failed_minions(out, agent, failed_minions):
20
21 ''' Verify failed minions '''
22
23 if len(failed_minions) > 0:
24 logger.error("%s check FAILED" % agent)
25 logger.error("Some minions returned non-zero exit code or empty data")
26 logger.error("Failed minions:" + str(failed_minions))
27 for minion in failed_minions:
28 logger.error(minion)
29 logger.debug(str(out[minion]['ret']))
30 __context__['retcode'] = 2
31 return False
32
33 return True
34
35
36def _minions_output(out, agent, ignore_dead, ignore_empty=False):
37
38 ''' Verify minions output and exit code '''
39
40 if not out:
41 logger.error("%s check FAILED" % agent)
42 logger.error("No response from master cmd")
43 __context__['retcode'] = 2
44 return False
45
46 if not ignore_dead:
47 jid = out.itervalues().next()['jid']
48 job_stats = __salt__['saltutil.runner']( 'jobs.print_job', arg=[jid] ) or None
49 if not job_stats:
50 logger.error("%s check FAILED" % agent)
51 logger.error("No response from master runner")
52 __context__['retcode'] = 2
53 return False
54
55 job_result = job_stats[jid]['Result']
56 job_minions = job_stats[jid]['Minions']
57 if len(job_minions) != len(job_result):
58 logger.error("%s check FAILED" % agent)
59 logger.error("Some minions are offline")
60 logger.error(list(set(job_minions) - set(job_result.keys())))
61 __context__['retcode'] = 2
62 return False
63
64 failed_minions = []
65 for minion in out:
66 if 'retcode' in out[minion]:
67 if out[minion]['retcode'] == 0:
68 if not ignore_empty:
69 if isinstance(out[minion]['ret'], bool):
70 if minion not in failed_minions:
71 failed_minions.append(minion)
72 elif len(out[minion]['ret']) == 0:
73 if minion not in failed_minions:
74 failed_minions.append(minion)
75 else:
76 if minion not in failed_minions:
77 failed_minions.append(minion)
78 else:
79 if minion not in failed_minions:
80 failed_minions.append(minion)
81
82 if not _failed_minions(out, agent, failed_minions):
83 __context__['retcode'] = 2
84 return False
85
86 return True
87
88
89def minions_check(wait_timeout=1, gather_job_wait_timeout=1, target='*', target_type='glob', ignore_dead=False):
90
91 ''' Verify minions are online '''
92
93 agent = "Minions"
94 out = __salt__['saltutil.cmd']( tgt=target,
95 tgt_type=target_type,
96 fun='test.ping',
97 timeout=wait_timeout,
98 gather_job_timeout=gather_job_wait_timeout
99 ) or None
100
101 return _minions_output(out, agent, ignore_dead, ignore_empty=True)
102
103
104def time_diff_check(time_diff=1, target='*', target_type='glob', ignore_dead=False, **kwargs):
105
106 ''' Verify time diff on servers '''
107
108 agent = "Time diff"
109 out = __salt__['saltutil.cmd']( tgt=target,
110 tgt_type=target_type,
111 fun='status.time',
112 arg=['%s'],
113 timeout=3
114 ) or None
115
116 if not _minions_output(out, agent, ignore_dead):
117 __context__['retcode'] = 2
118 return False
119
120 minions_times = {}
121 env_times = []
122 verified_minions = []
123
124 for minion in out:
125 verified_minions.append(minion)
126 if out[minion]['retcode'] == 0:
127 minion_time = int(out[minion]['ret'])
128 if str(minion_time) not in minions_times:
129 minions_times[str(minion_time)] = []
130 minions_times[str(minion_time)].append(minion)
131 env_times.append(minion_time)
132
133 env_times.sort()
134 diff = env_times[-1] - env_times[0]
135
136 if diff > time_diff:
137 __context__['retcode'] = 2
138 if kwargs.get("debug", False):
139 return False, minions_times
140 else:
141 return False
142
143 if kwargs.get("debug", False):
144 logger.info(verified_minions)
145 return True
146
147
148def contrail_check(target='I@contrail:control or I@contrail:collector or I@opencontrail:compute or I@opencontrail:client', target_type='compound', ignore_dead=False, **kwargs):
149
150 ''' Verify contrail status returns nothing critical '''
151
152 agent = "Contrail status"
153 out = __salt__['saltutil.cmd']( tgt=target,
154 tgt_type=target_type,
155 fun='cmd.run',
156 arg=['contrail-status'],
157 timeout=5
158 ) or None
159
160 if not _minions_output(out, agent, ignore_dead):
161 __context__['retcode'] = 2
162 return False
163
164 failed_minions = []
165 pattern = '^(==|$|\S+\s+(active|backup|inactive\s\(disabled\son\sboot\)))'
166 prog = re.compile(pattern)
167
168 validated = []
169 for minion in out:
170 for line in out[minion]['ret'].split('\n'):
171 if not prog.match(line) and minion not in failed_minions:
172 failed_minions.append(minion)
173 validated.append(minion)
174
175 if not _failed_minions(out, agent, failed_minions):
176 __context__['retcode'] = 2
177 return False
178
179 if kwargs.get("debug", False):
180 logger.info(validated)
181 return True
182
183
184def galera_check(cluster_size=3, target='I@galera:master or I@galera:slave', target_type='compound', ignore_dead=False, **kwargs):
185
186 ''' Verify galera cluster size and state '''
187
188 agent = "Galera status"
189 out = __salt__['saltutil.cmd']( tgt=target,
190 tgt_type=target_type,
191 fun='mysql.status',
192 timeout=3
193 ) or None
194
195 if not _minions_output(out, agent, ignore_dead):
196 __context__['retcode'] = 2
197 return False
198
199 failed_minions = []
200
201 validated = []
202 for minion in out:
203 if int(out[minion]['ret']['wsrep_cluster_size']) != int(cluster_size) and minion not in failed_minions:
204 failed_minions.append(minion)
205 if out[minion]['ret']['wsrep_evs_state'] != 'OPERATIONAL' and minion not in failed_minions:
206 failed_minions.append(minion)
207 validated.append(minion)
208
209 if not _failed_minions(out, agent, failed_minions):
210 __context__['retcode'] = 2
211 return False
212
213 if kwargs.get("debug", False):
214 logger.info(validated)
215 logger.info("Cluster size: " + str(out[validated[0]]['ret']['wsrep_cluster_size']))
216 logger.info("Cluster state: " + str(out[validated[0]]['ret']['wsrep_evs_state']))
217 return True
218
219
220def _quote_str(s, l=False, r=False):
221
222 ''' Quting rabbitmq erl objects for json import '''
223
224 if len(s) > 0:
225 if l:
226 s = s.lstrip()
227 if r:
228 s = s.rstrip()
229 if (s[0] == "'") and (s[-1] != "'") and r and not l:
230 s += "'"
231 if (s[0] == '"') and (s[-1] != '"') and r and not l:
232 s += '"'
233 if (s[-1] == "'") and (s[0] != "'") and l and not r:
234 s = "'" + s
235 if (s[-1] == '"') and (s[0] != '"') and l and not r:
236 s = '"' + s
237 if (s[-1] != "'") and (s[-1] != '"') and (s[0] != "'") and (s[0] != '"'):
238 s = '"' + s.replace('"', '\\\"') + '"'
239 else:
240 if (not l) and (not r) and s[0] != '"' and not s[-1] != '"':
241 s= s.replace('"', '\\\"')
242 return s.replace("'", '"')
243 else:
244 return s
245
246
247def _sanitize_rmqctl_output(string):
248
249 ''' Sanitizing rabbitmq erl objects for json import '''
250
251 rabbitctl_json = ""
252 for line in string.split(','):
253 copy = line
254 left = ""
255 right = ""
256 mid = copy
257 lpar = False
258 rpar = False
259 if re.search('([\[\{\s]+)(.*)', copy):
260 mid = re.sub('^([\[\{\s]+)','', copy)
261 left = copy[:-len(mid)]
262 copy = mid
263 lpar = True
264 if re.search('(.*)([\]\}\s]+)$', copy):
265 mid = re.sub('([\]\}\s]+)$','', copy)
266 right = copy[len(mid):]
267 copy = mid
268 rpar = True
269 result = left + _quote_str(mid, l=lpar, r=rpar) + right
270 if (not rpar) and lpar and (len(left.strip()) > 0) and (left.strip()[-1] == '{'):
271 result += ":"
272 else:
273 result += ","
274 rabbitctl_json += result
275
276 rabbitctl_json = rabbitctl_json[:-1]
277 new_rabbitctl_json = rabbitctl_json
278 for s in re.findall('"[^:\[{\]}]+"\s*:\s*("[^\[{\]}]+")', rabbitctl_json):
279 if '"' in s[1:][:-1]:
280 orig = s
281 changed = '"' + s.replace('\\', '\\\\').replace('"', '\\\"') + '"'
282 new_rabbitctl_json = new_rabbitctl_json.replace(orig, changed)
283 return new_rabbitctl_json
284
285
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200286def rabbitmq_list_queues(vhost='/'):
287
288 ''' JSON formatted RabbitMQ queues list '''
289
290 proc = subprocess.Popen(['rabbitmqctl', 'list_queues' , '-p', vhost], stdout=subprocess.PIPE)
291 stdout, stderr = proc.communicate()
292
293 queues = {}
294 for line in stdout.split('\n'):
295 if re.findall('[0-9]$', line):
296 queue_name, num = re.sub(r"\s+", " ", line).split()
297 queues[queue_name] = int(num)
298
299 return queues
300
301
302def rabbitmq_list_vhosts():
303
304 ''' JSON formatted RabbitMQ vhosts list '''
305
306 proc = subprocess.Popen(['rabbitmqctl', 'list_vhosts'], stdout=subprocess.PIPE)
307 stdout, stderr = proc.communicate()
308
309 vhosts = []
310 for line in stdout.split('\n'):
311 if re.findall('^/', line):
312 vhosts.append(line)
313
314 return vhosts
315
316
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200317def rabbitmq_cmd(cmd):
318
319 ''' JSON formatted RabbitMQ command output '''
320
321 supported_commands = ['status', 'cluster_status', 'list_hashes', 'list_ciphers']
322 if cmd not in supported_commands:
323 logger.error("Command is not supported yet, sorry")
324 logger.error("Supported commands are: " + str(supported_commands))
325 __context__['retcode'] = 2
326 return False
327
328 proc = subprocess.Popen(['rabbitmqctl', cmd], stdout=subprocess.PIPE)
329 stdout, stderr = proc.communicate()
330
331 rabbitmqctl_cutoff = stdout[int(stdout.find('[')):int(stdout.rfind(']'))+1].replace('\n','')
332 return json.loads(_sanitize_rmqctl_output(rabbitmqctl_cutoff))
333
334
335def rabbitmq_check(target='I@rabbitmq:server', target_type='compound', ignore_dead=False, **kwargs):
336
337 ''' Verify rabbit cluster and it's alarms '''
338
339 agent = "RabbitMQ status"
340 out = __salt__['saltutil.cmd']( tgt=target,
341 tgt_type=target_type,
342 fun='health_checks.rabbitmq_cmd',
343 arg=['cluster_status'],
344 timeout=3
345 ) or None
346
347 if not _minions_output(out, agent, ignore_dead):
348 __context__['retcode'] = 2
349 return False
350
351 failed_minions = []
352
353 for minion in out:
354 rabbitmqctl_json = out[minion]['ret']
355 running_nodes = []
356 available_nodes = []
357 alarms = []
358 for el in rabbitmqctl_json:
359 if 'alarms' in el:
360 alarms = el['alarms']
361 if 'nodes' in el:
362 available_nodes = el['nodes'][0]['disc']
363 if 'running_nodes' in el:
364 running_nodes = el['running_nodes']
365
366 if running_nodes.sort() == available_nodes.sort():
367 nodes_alarms = []
368 for node in running_nodes:
369 for el in alarms:
370 if node in el:
371 if len(el[node]) > 0:
372 nodes_alarms.append(el[node])
373 if len(nodes_alarms) > 0:
374 failed_minions.append(minion)
375 else:
376 failed_minions.append(minion)
377
378 if not _failed_minions(out, agent, failed_minions):
379 __context__['retcode'] = 2
380 return False
381
382 if kwargs.get("debug", False):
383 logger.info(running_nodes)
384 return True
385
386
387def haproxy_status(socket_path='/run/haproxy/admin.sock', buff_size = 8192, encoding = 'UTF-8', stats_filter=[]):
388
389 ''' JSON formatted haproxy status '''
390
391 stat_cmd = 'show stat\n'
392
393 if not os.path.exists(socket_path):
394 logger.error('Socket %s does not exist or haproxy not running' % socket_path)
395 __context__['retcode'] = 2
396 return False
397
398 client = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM)
399 client.connect(socket_path)
400 stat_cmd = 'show stat\n'
401
402 client.send(bytearray(stat_cmd, encoding))
403 output = client.recv(buff_size)
404
405 res = ""
406 while output:
407 res += output.decode(encoding)
408 output = client.recv(buff_size)
409 client.close()
410
411 haproxy_stats = {}
412 res_list = res.split('\n')
413 fields = res_list[0][2:].split(',')
414 stats_list = []
415 for line in res_list[1:]:
416 if len(line.strip()) > 0:
417 stats_list.append(line)
418
419 for i in range(len(stats_list)):
420 element = {}
421 for n in fields:
422 element[n] = stats_list[i].split(',')[fields.index(n)]
423 server_name = element.pop('pxname')
424 server_type = element.pop('svname')
425 if stats_filter:
426 filtered_element = element.copy()
427 for el in element:
428 if el not in stats_filter:
429 filtered_element.pop(el)
430 element = filtered_element
431 if server_name not in haproxy_stats:
432 haproxy_stats[server_name] = {}
433 if server_type == "FRONTEND" or server_type == "BACKEND":
434 haproxy_stats[server_name][server_type] = element
435 else:
436 if 'UPSTREAM' not in haproxy_stats[server_name]:
437 haproxy_stats[server_name]['UPSTREAM'] = {}
438 haproxy_stats[server_name]['UPSTREAM'][server_type] = element
439
440 return haproxy_stats
441
442
443def haproxy_check(target='I@haproxy:proxy', target_type='compound', ignore_dead=False, ignore_services=[], ignore_upstreams=[], ignore_no_upstream=False, **kwargs):
444
445 ''' Verify haproxy backends status '''
446
447 agent = "haproxy status"
448 out = __salt__['saltutil.cmd']( tgt=target,
449 tgt_type=target_type,
450 fun='health_checks.haproxy_status',
451 arg=["stats_filter=['status']"],
452 timeout=3
453 ) or None
454
455 if not _minions_output(out, agent, ignore_dead):
456 __context__['retcode'] = 2
457 return False
458
459 failed_minions = []
460 verified_minions = []
461 for minion in out:
462 verified_minions.append(minion)
463 haproxy_json = out[minion]['ret']
464 for service in haproxy_json:
465 if service not in ignore_services:
466 if haproxy_json[service]['FRONTEND']['status'] != 'OPEN':
467 if minion not in failed_minions:
468 failed_minions.append(minion)
469 if haproxy_json[service]['BACKEND']['status'] != 'UP':
470 if minion not in failed_minions:
471 failed_minions.append(minion)
472 if 'UPSTREAM' in haproxy_json[service]:
473 for upstream in haproxy_json[service]['UPSTREAM']:
474 if upstream not in ignore_upstreams:
475 if haproxy_json[service]['UPSTREAM'][upstream]['status'] != 'UP':
476 if minion not in failed_minions:
477 failed_minions.append(minion)
478 else:
479 if not ignore_no_upstream:
480 if minion not in failed_minions:
481 failed_minions.append(minion)
482
483 if not _failed_minions(out, agent, failed_minions):
484 __context__['retcode'] = 2
485 return False
486
487 if kwargs.get("debug", False):
488 logger.info(verified_minions)
489 return True
490
491
492def df_check(target='*', target_type='glob', verify='space', space_limit=80, inode_limit=80, ignore_dead=False, ignore_partitions=[], **kwargs):
493
494 ''' Verify storage space/inodes status '''
495
496 supported_options = ['space', 'inodes']
497 if verify not in supported_options:
498 logger.error('Unsupported "verify" option.')
499 logger.error('Supported options are: %s' % str(supported_options))
500 __context__['retcode'] = 2
501 return False
502
503 if verify == 'space':
504 fun_cmd = 'disk.usage'
505 json_arg = 'capacity'
506 limit = space_limit
507 elif verify == 'inodes':
508 fun_cmd = 'disk.inodeusage'
509 json_arg = 'use'
510 limit = inode_limit
511
512 agent = "df status"
513 out = __salt__['saltutil.cmd']( tgt=target,
514 tgt_type=target_type,
515 fun=fun_cmd,
516 timeout=3
517 ) or None
518
519 if not _minions_output(out, agent, ignore_dead):
520 __context__['retcode'] = 2
521 return False
522
523 failed_minions = []
524 verified_minions = []
525 for minion in out:
526 verified_minions.append(minion)
527 df_json = out[minion]['ret']
528 for disk in df_json:
529 if disk not in ignore_partitions:
530 if int(df_json[disk][json_arg][:-1]) > int(limit):
531 if minion not in failed_minions:
532 failed_minions.append(minion)
533
534 if not _failed_minions(out, agent, failed_minions):
535 __context__['retcode'] = 2
536 return False
537
538 if kwargs.get("debug", False):
539 logger.info(verified_minions)
540 return True
541
542
543def load_check(target='*', target_type='glob', la1=3, la5=3, la15=3, ignore_dead=False, **kwargs):
544
545 ''' Verify load average status '''
546
547 agent = "load average status"
548 out = __salt__['saltutil.cmd']( tgt=target,
549 tgt_type=target_type,
550 fun='status.loadavg',
551 timeout=3
552 ) or None
553
554 if not _minions_output(out, agent, ignore_dead):
555 __context__['retcode'] = 2
556 return False
557
558 failed_minions = []
559 verified_minions = []
560 for minion in out:
561 verified_minions.append(minion)
562 la_json = out[minion]['ret']
563 if float(la_json['1-min']) > float(la1):
564 if minion not in failed_minions:
565 failed_minions.append(minion)
566 if float(la_json['5-min']) > float(la5):
567 if minion not in failed_minions:
568 failed_minions.append(minion)
569 if float(la_json['15-min']) > float(la15):
570 if minion not in failed_minions:
571 failed_minions.append(minion)
572
573 if not _failed_minions(out, agent, failed_minions):
574 __context__['retcode'] = 2
575 return False
576
577 if kwargs.get("debug", False):
578 logger.info(verified_minions)
579 return True
580
581
582def netdev_check(target='*', target_type='glob', rx_drop_limit=0, tx_drop_limit=0, ignore_devices=[], ignore_dead=False, **kwargs):
583
584 ''' Verify netdev rx/tx drop status '''
585
586 agent = "netdev rx/tx status"
587 out = __salt__['saltutil.cmd']( tgt=target,
588 tgt_type=target_type,
589 fun='status.netdev',
590 timeout=3
591 ) or None
592
593 if not _minions_output(out, agent, ignore_dead):
594 __context__['retcode'] = 2
595 return False
596
597 failed_minions = []
598 verified_minions = []
599 for minion in out:
600 verified_minions.append(minion)
601 dev_json = out[minion]['ret']
602 for netdev in dev_json:
603 if netdev not in ignore_devices:
604 if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit):
605 if minion not in failed_minions:
606 failed_minions.append(minion)
607 if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit):
608 if minion not in failed_minions:
609 failed_minions.append(minion)
610
611 if not _failed_minions(out, agent, failed_minions):
612 __context__['retcode'] = 2
613 return False
614
615 if kwargs.get("debug", False):
616 logger.info(verified_minions)
617 return True
618
619
620def mem_check(target='*', target_type='glob', used_limit=80, ignore_dead=False, **kwargs):
621
622 ''' Verify available memory status '''
623
624 agent = "available memory status"
625 out = __salt__['saltutil.cmd']( tgt=target,
626 tgt_type=target_type,
627 fun='status.meminfo',
628 timeout=3
629 ) or None
630
631 if not _minions_output(out, agent, ignore_dead):
632 __context__['retcode'] = 2
633 return False
634
635 failed_minions = []
636 verified_minions = []
637 for minion in out:
638 mem_avail = int(out[minion]['ret']['MemAvailable']['value'])
639 mem_total = int(out[minion]['ret']['MemTotal']['value'])
640 used_pct = float((mem_total - mem_avail) * 100 / mem_total)
641 if used_pct > float(used_limit):
642 if minion not in failed_minions:
643 failed_minions.append(minion)
644 else:
645 verified_minions.append( { minion : str(used_pct) + '%' } )
646
647 if not _failed_minions(out, agent, failed_minions):
648 __context__['retcode'] = 2
649 return False
650
651 if kwargs.get("debug", False):
652 logger.info(verified_minions)
653 return True
654
655
656def ntp_status(params = ['-4', '-p', '-n']):
657
658 ''' JSON formatted ntpq command output '''
659
660 ntp_states = [
661 { 'indicator': '#', 'comment': 'source selected, distance exceeds maximum value' },
662 { 'indicator': 'o', 'comment': 'source selected, Pulse Per Second (PPS) used' },
663 { 'indicator': '+', 'comment': 'source selected, included in final set' },
664 { 'indicator': 'x', 'comment': 'source false ticker' },
665 { 'indicator': '.', 'comment': 'source selected from end of candidate list' },
666 { 'indicator': '-', 'comment': 'source discarded by cluster algorithm' },
667 { 'indicator': '*', 'comment': 'current time source' },
668 { 'indicator': ' ', 'comment': 'source discarded high stratum, failed sanity' }
669 ]
670 ntp_state_indicators = []
671 for state in ntp_states:
672 ntp_state_indicators.append(state['indicator'])
673 source_types = {}
674 source_types['l'] = "local (such as a GPS, WWVB)"
675 source_types['u'] = "unicast (most common)"
676 source_types['m'] = "multicast"
677 source_types['b'] = "broadcast"
678 source_types['-'] = "netaddr"
679
680 proc = subprocess.Popen(['ntpq'] + params, stdout=subprocess.PIPE)
681 stdout, stderr = proc.communicate()
682
683 ntp_lines = stdout.split('\n')
684 fields = re.sub("\s+", " ", ntp_lines[0]).split()
685 fields[fields.index('st')] = 'stratum'
686 fields[fields.index('t')] = 'source_type'
687
688 ntp_peers = {}
689 for line in ntp_lines[2:]:
690 if len(line.strip()) > 0:
691 element = {}
692 values = re.sub("\s+", " ", line).split()
693 for i in range(len(values)):
694 if fields[i] == 'source_type':
695 element[fields[i]] = { 'indicator': values[i], 'comment': source_types[values[i]] }
696 elif fields[i] in ['stratum', 'when', 'poll', 'reach']:
697 if values[i] == '-':
698 element[fields[i]] = int(-1)
699 else:
700 element[fields[i]] = int(values[i])
701 elif fields[i] in ['delay', 'offset', 'jitter']:
702 element[fields[i]] = float(values[i])
703 else:
704 element[fields[i]] = values[i]
705 peer = element.pop('remote')
706 peer_state = peer[0]
707 if peer_state in ntp_state_indicators:
708 peer = peer[1:]
709 else:
710 peer_state = 'f'
711 element['current'] = False
712 if peer_state == '*':
713 element['current'] = True
714 for state in ntp_states:
715 if state['indicator'] == peer_state:
716 element['state'] = state.copy()
717 if peer_state == 'f' and state['indicator'] == ' ':
718 fail_state = state.copy()
719 fail_state.pop('indicator')
720 fail_state['indicator'] = 'f'
721 element['state'] = fail_state
722 ntp_peers[peer] = element
723
724 return ntp_peers
725
726
727def ntp_check(min_peers=1, max_stratum=3, target='*', target_type='glob', ignore_dead=False, **kwargs):
728
729 ''' Verify NTP peers status '''
730
731 agent = "ntpd peers status"
732 out = __salt__['saltutil.cmd']( tgt=target,
733 tgt_type=target_type,
734 fun='health_checks.ntp_status',
735 timeout=3
736 ) or None
737
738 if not _minions_output(out, agent, ignore_dead):
739 __context__['retcode'] = 2
740 return False
741
742 failed_minions = []
743 verified_minions = []
744 for minion in out:
745 ntp_json = out[minion]['ret']
746 good_peers = []
747 for peer in ntp_json:
748 if ntp_json[peer]['stratum'] < int(max_stratum) + 1:
749 good_peers.append(peer)
750 if len(good_peers) > int(min_peers) - 1:
751 if minion not in verified_minions:
752 verified_minions.append(minion)
753 else:
754 if minion not in failed_minions:
755 failed_minions.append(minion)
756
757 if not _failed_minions(out, agent, failed_minions):
758 __context__['retcode'] = 2
759 return False
760
761 if kwargs.get("debug", False):
762 logger.info(verified_minions)
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200763
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200764 return True
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200765
766
767def gluster_pool_list():
768
769 ''' JSON formatted GlusterFS pool list command output '''
770
771 proc = subprocess.Popen(['gluster', 'pool', 'list'], stdout=subprocess.PIPE)
772 stdout, stderr = proc.communicate()
773
774 regex = re.compile('^(\S+)\s+(\S+)\s+(\S+)$')
775 fields = regex.findall(stdout.split('\n')[0])[0]
776
777 pool = {}
778
779 for line in stdout.split('\n')[1:]:
780 if len(line.strip()) > 0:
781 peer = {}
782 values = regex.findall(line.strip())[0]
783 for i in range(len(fields)):
784 peer[fields[i].lower()] = values[i]
785 uuid = peer.pop('uuid')
786 pool[uuid] = peer
787
788 return pool
789
790
791def gluster_volume_status():
792
793 ''' JSON formatted GlusterFS volumes status command output '''
794
795 proc = subprocess.Popen(['gluster', 'volume', 'status', 'all', 'detail'], stdout=subprocess.PIPE)
796 stdout, stderr = proc.communicate()
797
798 begin_volume = False
799 brick_lookup = False
800 volumes = {}
801 volume_name = ""
802
803 for line in stdout.split('\n'):
804 if 'Status of volume' in line:
805 volume_name = line.split(':')[1].strip()
806 volumes[volume_name] = { 'bricks': [] }
807 begin_volume = True
808 elif len(line.strip()) == 0:
809 if begin_volume:
810 begin_volume = False
811 elif '--------' in line:
812 brick_lookup = True
813 elif brick_lookup and line.split(':')[0].strip() == 'Brick':
814 brick_host, brick_path = re.findall('^Brick\ *:\ (.*)', line)[0].split()[1].split(':')
815 volumes[volume_name]['bricks'].append({ 'host': brick_host, 'path': brick_path })
816 brick_lookup = False
817 else:
818 brick_key, brick_value = line.split(':')
819 brick_key = brick_key.strip().lower().replace(' ', '_')
820 brick_value = brick_value.strip()
821 volumes[volume_name]['bricks'][len(volumes[volume_name]['bricks']) - 1][brick_key] = brick_value
822
823 return volumes
824
825
826def gluster_pool_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_dead=False, **kwargs):
827
828 ''' Check GlusterFS peer status '''
829
830 agent = "glusterfs peer status"
831 out = __salt__['saltutil.cmd']( tgt=target,
832 tgt_type=target_type,
833 fun='health_checks.gluster_pool_list',
834 timeout=3,
835 kwargs='[batch=True]'
836 ) or None
837
838 if not _minions_output(out, agent, ignore_dead):
839 __context__['retcode'] = 2
840 return False
841
842 failed_minions = []
843 verified_minions = []
844 for minion in out:
845 verified_minions.append(minion)
846 gluster_json = out[minion]['ret']
847 alive_peers = []
848 for peer in gluster_json:
849 if gluster_json[peer]['state'] == 'Connected':
850 alive_peers.append(peer)
851 else:
852 if minion not in failed_minions:
853 failed_minions.append(minion)
854 if len(alive_peers) < expected_size:
855 if minion not in failed_minions:
856 failed_minions.append(minion)
857
858 if not _failed_minions(out, agent, failed_minions):
859 __context__['retcode'] = 2
860 return False
861
862 if kwargs.get("debug", False):
863 logger.info(verified_minions)
864
865 return True
866
867
868def gluster_volumes_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_volumes=[], ignore_dead=False, **kwargs):
869
870 ''' Check GlusterFS volumes status '''
871
872 agent = "glusterfs volumes status"
873 out = __salt__['saltutil.cmd']( tgt=target,
874 tgt_type=target_type,
875 fun='health_checks.gluster_volume_status',
876 timeout=3,
877 kwargs='[batch=True]'
878 ) or None
879
880 if not _minions_output(out, agent, ignore_dead):
881 __context__['retcode'] = 2
882 return False
883
884 failed_minions = []
885 verified_minions = []
886 verified_volumes = []
887 for minion in out:
888 verified_minions.append(minion)
889 gluster_json = out[minion]['ret']
890 for volume in gluster_json:
891 if volume in ignore_volumes:
892 continue
893 else:
894 verified_volumes.append(volume)
895 alive_bricks = 0
896 if 'bricks' not in gluster_json[volume]:
897 if minion not in failed_minions:
898 failed_minions.append(minion)
899 bricks = gluster_json[volume]['bricks']
900 if len(bricks) < expected_size:
901 if minion not in failed_minions:
902 failed_minions.append(minion)
903 for brick in bricks:
904 if brick['online'] == 'Y':
905 alive_bricks += 1
906 else:
907 if minion not in failed_minions:
908 failed_minions.append(minion)
909 if alive_bricks < expected_size:
910 if minion not in failed_minions:
911 failed_minions.append(minion)
912
913 if not _failed_minions(out, agent, failed_minions):
914 __context__['retcode'] = 2
915 return False
916
917 if kwargs.get("debug", False):
918 logger.info("Verified minions:")
919 logger.info(verified_minions)
920 logger.info("Verified volumes:")
921 logger.info(verified_volumes)
922
923 return True
924
925
926def ceph_cmd(cmd):
927
928 ''' JSON formatted ceph command output '''
929
930 proc = subprocess.Popen(['ceph'] + cmd.split() + ['--format', 'json-pretty'], stdout=subprocess.PIPE)
931 stdout, stderr = proc.communicate()
932
933 return json.loads(stdout)
934
935
936def ceph_health_check(target='I@ceph:mon', target_type='compound', expected_status='HEALTH_OK', expected_state='active+clean', ignore_dead=False, **kwargs):
937
938 ''' Check all ceph monitors health status '''
939
940 agent = "ceph health status"
941 out = __salt__['saltutil.cmd']( tgt=target,
942 tgt_type=target_type,
943 fun='health_checks.ceph_cmd',
944 arg=['status'],
945 timeout=3
946 ) or None
947
948 if not _minions_output(out, agent, ignore_dead):
949 __context__['retcode'] = 2
950 return False
951
952 failed_minions = []
953 verified_minions = []
954 for minion in out:
955 verified_minions.append(minion)
956 ceph_json = out[minion]['ret']
957 fsid = ceph_json['fsid']
958
959 if ceph_json['health']['overall_status'] != expected_status:
960 if minion not in failed_minions:
961 failed_minions.append(minion)
962
963 if ceph_json['osdmap']['osdmap']['full']:
964 if minion not in failed_minions:
965 failed_minions.append(minion)
966
967 if ceph_json['osdmap']['osdmap']['nearfull']:
968 if minion not in failed_minions:
969 failed_minions.append(minion)
970
971 num_osds = ceph_json['osdmap']['osdmap']['num_osds']
972 num_in_osds = ceph_json['osdmap']['osdmap']['num_in_osds']
973 num_up_osds = ceph_json['osdmap']['osdmap']['num_up_osds']
974 if not ( num_osds == num_in_osds == num_up_osds ):
975 if minion not in failed_minions:
976 failed_minions.append(minion)
977
978 quorum = len(ceph_json['quorum'])
979 quorum_names = len(ceph_json['quorum_names'])
980 mons = len(ceph_json['monmap']['mons'])
981 if not ( quorum == quorum_names == mons ):
982 if minion not in failed_minions:
983 failed_minions.append(minion)
984
985 for mon in ceph_json['health']['timechecks']['mons']:
986 if mon['health'] != expected_status:
987 if minion not in failed_minions:
988 failed_minions.append(minion)
989
990 for srv in ceph_json['health']['health']['health_services']:
991 for mon in srv['mons']:
992 if mon['health'] != expected_status:
993 if minion not in failed_minions:
994 failed_minions.append(minion)
995
996 for state in ceph_json['pgmap']['pgs_by_state']:
997 if state['state_name'] != expected_state:
998 if minion not in failed_minions:
999 failed_minions.append(minion)
1000
1001 if not _failed_minions(out, agent, failed_minions):
1002 __context__['retcode'] = 2
1003 return False
1004
1005 if kwargs.get("debug", False):
1006 logger.info("Quorum:")
1007 logger.info(ceph_json['quorum_names'])
1008 logger.info("Verified minions:")
1009 logger.info(verified_minions)
1010
1011 return True
1012
1013
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001014def get_entropy():
1015
1016 ''' Retrieve entropy size for the host '''
1017
1018 with open('/proc/sys/kernel/random/entropy_avail', 'r') as f:
1019 entropy = f.read()
1020 return entropy
1021
1022
1023def entropy_check(target='*', target_type='glob', minimum_bits=700, ignore_dead=False, **kwargs):
1024
1025 ''' Check entropy size in cluster '''
1026
1027 agent = "entropy size status"
1028 out = __salt__['saltutil.cmd']( tgt=target,
1029 tgt_type=target_type,
1030 fun='health_checks.get_entropy',
1031 timeout=3
1032 ) or None
1033
1034 if not _minions_output(out, agent, ignore_dead):
1035 __context__['retcode'] = 2
1036 return False
1037
1038 failed_minions = []
1039 verified_minions = []
1040
1041 print out
1042 for minion in out:
1043 verified_minions.append(minion)
1044 entropy = int(out[minion]['ret'])
1045 if entropy < minimum_bits:
1046 if not minion in failed_minions:
1047 failed_minions.append(minion)
1048
1049 if not _failed_minions(out, agent, failed_minions):
1050 __context__['retcode'] = 2
1051 return False
1052
1053 if kwargs.get("debug", False):
1054 logger.info(verified_minions)
1055
1056 return True
1057
1058
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001059def docker_registry_list(host):
1060
1061 ''' Retrieve and list docker catalog '''
1062
1063 try:
1064 if host[0:4] == 'http':
1065 url = host + '/v2/'
1066 else:
1067 url = 'http://' + host + '/v2/'
1068 repos = requests.get(url + '_catalog')
1069
1070 versions = {}
1071 for repo in repos.json()['repositories']:
1072 repo_versions = requests.get(url + repo + '/tags/list')
1073 versions[repo] = repo_versions.json().pop('tags')
1074 return versions
1075 except:
1076 return {}
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001077
1078
1079def docker_ps(list_all=0):
1080
1081 import docker
1082 client = docker.client.Client(base_url='unix://var/run/docker.sock')
1083 return client.containers(all=list_all)
1084