blob: ad4334317a85f884c1af1d7c947bf80d3df9c9eb [file] [log] [blame]
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001import requests
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +02002import subprocess
3import socket
4import salt.utils
5import logging
6import os
7import re
8import json
9
10__author__ = "Dzmitry Stremkouski"
11__copyright__ = "Copyright 2019, Mirantis Inc."
12__license__ = "Apache 2.0"
13
14logger = logging.getLogger(__name__)
15stream = logging.StreamHandler()
16logger.addHandler(stream)
17
18
19def _failed_minions(out, agent, failed_minions):
20
21 ''' Verify failed minions '''
22
23 if len(failed_minions) > 0:
24 logger.error("%s check FAILED" % agent)
25 logger.error("Some minions returned non-zero exit code or empty data")
26 logger.error("Failed minions:" + str(failed_minions))
27 for minion in failed_minions:
28 logger.error(minion)
29 logger.debug(str(out[minion]['ret']))
30 __context__['retcode'] = 2
31 return False
32
33 return True
34
35
36def _minions_output(out, agent, ignore_dead, ignore_empty=False):
37
38 ''' Verify minions output and exit code '''
39
40 if not out:
41 logger.error("%s check FAILED" % agent)
42 logger.error("No response from master cmd")
43 __context__['retcode'] = 2
44 return False
45
46 if not ignore_dead:
47 jid = out.itervalues().next()['jid']
48 job_stats = __salt__['saltutil.runner']( 'jobs.print_job', arg=[jid] ) or None
49 if not job_stats:
50 logger.error("%s check FAILED" % agent)
51 logger.error("No response from master runner")
52 __context__['retcode'] = 2
53 return False
54
55 job_result = job_stats[jid]['Result']
56 job_minions = job_stats[jid]['Minions']
57 if len(job_minions) != len(job_result):
58 logger.error("%s check FAILED" % agent)
59 logger.error("Some minions are offline")
60 logger.error(list(set(job_minions) - set(job_result.keys())))
61 __context__['retcode'] = 2
62 return False
63
64 failed_minions = []
65 for minion in out:
66 if 'retcode' in out[minion]:
67 if out[minion]['retcode'] == 0:
68 if not ignore_empty:
69 if isinstance(out[minion]['ret'], bool):
70 if minion not in failed_minions:
71 failed_minions.append(minion)
72 elif len(out[minion]['ret']) == 0:
73 if minion not in failed_minions:
74 failed_minions.append(minion)
75 else:
76 if minion not in failed_minions:
77 failed_minions.append(minion)
78 else:
79 if minion not in failed_minions:
80 failed_minions.append(minion)
81
82 if not _failed_minions(out, agent, failed_minions):
83 __context__['retcode'] = 2
84 return False
85
86 return True
87
88
89def minions_check(wait_timeout=1, gather_job_wait_timeout=1, target='*', target_type='glob', ignore_dead=False):
90
91 ''' Verify minions are online '''
92
93 agent = "Minions"
94 out = __salt__['saltutil.cmd']( tgt=target,
95 tgt_type=target_type,
96 fun='test.ping',
97 timeout=wait_timeout,
98 gather_job_timeout=gather_job_wait_timeout
99 ) or None
100
101 return _minions_output(out, agent, ignore_dead, ignore_empty=True)
102
103
104def time_diff_check(time_diff=1, target='*', target_type='glob', ignore_dead=False, **kwargs):
105
106 ''' Verify time diff on servers '''
107
108 agent = "Time diff"
109 out = __salt__['saltutil.cmd']( tgt=target,
110 tgt_type=target_type,
111 fun='status.time',
112 arg=['%s'],
113 timeout=3
114 ) or None
115
116 if not _minions_output(out, agent, ignore_dead):
117 __context__['retcode'] = 2
118 return False
119
120 minions_times = {}
121 env_times = []
122 verified_minions = []
123
124 for minion in out:
125 verified_minions.append(minion)
126 if out[minion]['retcode'] == 0:
127 minion_time = int(out[minion]['ret'])
128 if str(minion_time) not in minions_times:
129 minions_times[str(minion_time)] = []
130 minions_times[str(minion_time)].append(minion)
131 env_times.append(minion_time)
132
133 env_times.sort()
134 diff = env_times[-1] - env_times[0]
135
136 if diff > time_diff:
137 __context__['retcode'] = 2
138 if kwargs.get("debug", False):
139 return False, minions_times
140 else:
141 return False
142
143 if kwargs.get("debug", False):
144 logger.info(verified_minions)
145 return True
146
147
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200148def contrail_check(target='I@opencontrail:control or I@opencontrail:collector or I@opencontrail:compute', target_type='compound', ignore_dead=False, **kwargs):
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200149
150 ''' Verify contrail status returns nothing critical '''
151
152 agent = "Contrail status"
153 out = __salt__['saltutil.cmd']( tgt=target,
154 tgt_type=target_type,
155 fun='cmd.run',
156 arg=['contrail-status'],
157 timeout=5
158 ) or None
159
160 if not _minions_output(out, agent, ignore_dead):
161 __context__['retcode'] = 2
162 return False
163
164 failed_minions = []
165 pattern = '^(==|$|\S+\s+(active|backup|inactive\s\(disabled\son\sboot\)))'
166 prog = re.compile(pattern)
167
168 validated = []
169 for minion in out:
170 for line in out[minion]['ret'].split('\n'):
171 if not prog.match(line) and minion not in failed_minions:
172 failed_minions.append(minion)
173 validated.append(minion)
174
175 if not _failed_minions(out, agent, failed_minions):
176 __context__['retcode'] = 2
177 return False
178
179 if kwargs.get("debug", False):
180 logger.info(validated)
181 return True
182
183
184def galera_check(cluster_size=3, target='I@galera:master or I@galera:slave', target_type='compound', ignore_dead=False, **kwargs):
185
186 ''' Verify galera cluster size and state '''
187
188 agent = "Galera status"
189 out = __salt__['saltutil.cmd']( tgt=target,
190 tgt_type=target_type,
191 fun='mysql.status',
192 timeout=3
193 ) or None
194
195 if not _minions_output(out, agent, ignore_dead):
196 __context__['retcode'] = 2
197 return False
198
199 failed_minions = []
200
201 validated = []
202 for minion in out:
203 if int(out[minion]['ret']['wsrep_cluster_size']) != int(cluster_size) and minion not in failed_minions:
204 failed_minions.append(minion)
205 if out[minion]['ret']['wsrep_evs_state'] != 'OPERATIONAL' and minion not in failed_minions:
206 failed_minions.append(minion)
207 validated.append(minion)
208
209 if not _failed_minions(out, agent, failed_minions):
210 __context__['retcode'] = 2
211 return False
212
213 if kwargs.get("debug", False):
214 logger.info(validated)
215 logger.info("Cluster size: " + str(out[validated[0]]['ret']['wsrep_cluster_size']))
216 logger.info("Cluster state: " + str(out[validated[0]]['ret']['wsrep_evs_state']))
217 return True
218
219
220def _quote_str(s, l=False, r=False):
221
222 ''' Quting rabbitmq erl objects for json import '''
223
224 if len(s) > 0:
225 if l:
226 s = s.lstrip()
227 if r:
228 s = s.rstrip()
229 if (s[0] == "'") and (s[-1] != "'") and r and not l:
230 s += "'"
231 if (s[0] == '"') and (s[-1] != '"') and r and not l:
232 s += '"'
233 if (s[-1] == "'") and (s[0] != "'") and l and not r:
234 s = "'" + s
235 if (s[-1] == '"') and (s[0] != '"') and l and not r:
236 s = '"' + s
237 if (s[-1] != "'") and (s[-1] != '"') and (s[0] != "'") and (s[0] != '"'):
238 s = '"' + s.replace('"', '\\\"') + '"'
239 else:
240 if (not l) and (not r) and s[0] != '"' and not s[-1] != '"':
241 s= s.replace('"', '\\\"')
242 return s.replace("'", '"')
243 else:
244 return s
245
246
247def _sanitize_rmqctl_output(string):
248
249 ''' Sanitizing rabbitmq erl objects for json import '''
250
251 rabbitctl_json = ""
252 for line in string.split(','):
253 copy = line
254 left = ""
255 right = ""
256 mid = copy
257 lpar = False
258 rpar = False
259 if re.search('([\[\{\s]+)(.*)', copy):
260 mid = re.sub('^([\[\{\s]+)','', copy)
261 left = copy[:-len(mid)]
262 copy = mid
263 lpar = True
264 if re.search('(.*)([\]\}\s]+)$', copy):
265 mid = re.sub('([\]\}\s]+)$','', copy)
266 right = copy[len(mid):]
267 copy = mid
268 rpar = True
269 result = left + _quote_str(mid, l=lpar, r=rpar) + right
270 if (not rpar) and lpar and (len(left.strip()) > 0) and (left.strip()[-1] == '{'):
271 result += ":"
272 else:
273 result += ","
274 rabbitctl_json += result
275
276 rabbitctl_json = rabbitctl_json[:-1]
277 new_rabbitctl_json = rabbitctl_json
278 for s in re.findall('"[^:\[{\]}]+"\s*:\s*("[^\[{\]}]+")', rabbitctl_json):
279 if '"' in s[1:][:-1]:
280 orig = s
281 changed = '"' + s.replace('\\', '\\\\').replace('"', '\\\"') + '"'
282 new_rabbitctl_json = new_rabbitctl_json.replace(orig, changed)
283 return new_rabbitctl_json
284
285
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200286def rabbitmq_list_queues(vhost='/'):
287
288 ''' JSON formatted RabbitMQ queues list '''
289
290 proc = subprocess.Popen(['rabbitmqctl', 'list_queues' , '-p', vhost], stdout=subprocess.PIPE)
291 stdout, stderr = proc.communicate()
292
293 queues = {}
294 for line in stdout.split('\n'):
295 if re.findall('[0-9]$', line):
296 queue_name, num = re.sub(r"\s+", " ", line).split()
297 queues[queue_name] = int(num)
298
299 return queues
300
301
302def rabbitmq_list_vhosts():
303
304 ''' JSON formatted RabbitMQ vhosts list '''
305
306 proc = subprocess.Popen(['rabbitmqctl', 'list_vhosts'], stdout=subprocess.PIPE)
307 stdout, stderr = proc.communicate()
308
309 vhosts = []
310 for line in stdout.split('\n'):
311 if re.findall('^/', line):
312 vhosts.append(line)
313
314 return vhosts
315
316
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200317def rabbitmq_cmd(cmd):
318
319 ''' JSON formatted RabbitMQ command output '''
320
321 supported_commands = ['status', 'cluster_status', 'list_hashes', 'list_ciphers']
322 if cmd not in supported_commands:
323 logger.error("Command is not supported yet, sorry")
324 logger.error("Supported commands are: " + str(supported_commands))
325 __context__['retcode'] = 2
326 return False
327
328 proc = subprocess.Popen(['rabbitmqctl', cmd], stdout=subprocess.PIPE)
329 stdout, stderr = proc.communicate()
330
331 rabbitmqctl_cutoff = stdout[int(stdout.find('[')):int(stdout.rfind(']'))+1].replace('\n','')
332 return json.loads(_sanitize_rmqctl_output(rabbitmqctl_cutoff))
333
334
335def rabbitmq_check(target='I@rabbitmq:server', target_type='compound', ignore_dead=False, **kwargs):
336
337 ''' Verify rabbit cluster and it's alarms '''
338
339 agent = "RabbitMQ status"
340 out = __salt__['saltutil.cmd']( tgt=target,
341 tgt_type=target_type,
342 fun='health_checks.rabbitmq_cmd',
343 arg=['cluster_status'],
344 timeout=3
345 ) or None
346
347 if not _minions_output(out, agent, ignore_dead):
348 __context__['retcode'] = 2
349 return False
350
351 failed_minions = []
352
353 for minion in out:
354 rabbitmqctl_json = out[minion]['ret']
355 running_nodes = []
356 available_nodes = []
357 alarms = []
358 for el in rabbitmqctl_json:
359 if 'alarms' in el:
360 alarms = el['alarms']
361 if 'nodes' in el:
362 available_nodes = el['nodes'][0]['disc']
363 if 'running_nodes' in el:
364 running_nodes = el['running_nodes']
365
366 if running_nodes.sort() == available_nodes.sort():
367 nodes_alarms = []
368 for node in running_nodes:
369 for el in alarms:
370 if node in el:
371 if len(el[node]) > 0:
372 nodes_alarms.append(el[node])
373 if len(nodes_alarms) > 0:
374 failed_minions.append(minion)
375 else:
376 failed_minions.append(minion)
377
378 if not _failed_minions(out, agent, failed_minions):
379 __context__['retcode'] = 2
380 return False
381
382 if kwargs.get("debug", False):
383 logger.info(running_nodes)
384 return True
385
386
387def haproxy_status(socket_path='/run/haproxy/admin.sock', buff_size = 8192, encoding = 'UTF-8', stats_filter=[]):
388
389 ''' JSON formatted haproxy status '''
390
391 stat_cmd = 'show stat\n'
392
393 if not os.path.exists(socket_path):
394 logger.error('Socket %s does not exist or haproxy not running' % socket_path)
395 __context__['retcode'] = 2
396 return False
397
398 client = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM)
399 client.connect(socket_path)
400 stat_cmd = 'show stat\n'
401
402 client.send(bytearray(stat_cmd, encoding))
403 output = client.recv(buff_size)
404
405 res = ""
406 while output:
407 res += output.decode(encoding)
408 output = client.recv(buff_size)
409 client.close()
410
411 haproxy_stats = {}
412 res_list = res.split('\n')
413 fields = res_list[0][2:].split(',')
414 stats_list = []
415 for line in res_list[1:]:
416 if len(line.strip()) > 0:
417 stats_list.append(line)
418
419 for i in range(len(stats_list)):
420 element = {}
421 for n in fields:
422 element[n] = stats_list[i].split(',')[fields.index(n)]
423 server_name = element.pop('pxname')
424 server_type = element.pop('svname')
425 if stats_filter:
426 filtered_element = element.copy()
427 for el in element:
428 if el not in stats_filter:
429 filtered_element.pop(el)
430 element = filtered_element
431 if server_name not in haproxy_stats:
432 haproxy_stats[server_name] = {}
433 if server_type == "FRONTEND" or server_type == "BACKEND":
434 haproxy_stats[server_name][server_type] = element
435 else:
436 if 'UPSTREAM' not in haproxy_stats[server_name]:
437 haproxy_stats[server_name]['UPSTREAM'] = {}
438 haproxy_stats[server_name]['UPSTREAM'][server_type] = element
439
440 return haproxy_stats
441
442
443def haproxy_check(target='I@haproxy:proxy', target_type='compound', ignore_dead=False, ignore_services=[], ignore_upstreams=[], ignore_no_upstream=False, **kwargs):
444
445 ''' Verify haproxy backends status '''
446
447 agent = "haproxy status"
448 out = __salt__['saltutil.cmd']( tgt=target,
449 tgt_type=target_type,
450 fun='health_checks.haproxy_status',
451 arg=["stats_filter=['status']"],
452 timeout=3
453 ) or None
454
455 if not _minions_output(out, agent, ignore_dead):
456 __context__['retcode'] = 2
457 return False
458
459 failed_minions = []
460 verified_minions = []
461 for minion in out:
462 verified_minions.append(minion)
463 haproxy_json = out[minion]['ret']
464 for service in haproxy_json:
465 if service not in ignore_services:
466 if haproxy_json[service]['FRONTEND']['status'] != 'OPEN':
467 if minion not in failed_minions:
468 failed_minions.append(minion)
469 if haproxy_json[service]['BACKEND']['status'] != 'UP':
470 if minion not in failed_minions:
471 failed_minions.append(minion)
472 if 'UPSTREAM' in haproxy_json[service]:
473 for upstream in haproxy_json[service]['UPSTREAM']:
474 if upstream not in ignore_upstreams:
475 if haproxy_json[service]['UPSTREAM'][upstream]['status'] != 'UP':
476 if minion not in failed_minions:
477 failed_minions.append(minion)
478 else:
479 if not ignore_no_upstream:
480 if minion not in failed_minions:
481 failed_minions.append(minion)
482
483 if not _failed_minions(out, agent, failed_minions):
484 __context__['retcode'] = 2
485 return False
486
487 if kwargs.get("debug", False):
488 logger.info(verified_minions)
489 return True
490
491
492def df_check(target='*', target_type='glob', verify='space', space_limit=80, inode_limit=80, ignore_dead=False, ignore_partitions=[], **kwargs):
493
494 ''' Verify storage space/inodes status '''
495
496 supported_options = ['space', 'inodes']
497 if verify not in supported_options:
498 logger.error('Unsupported "verify" option.')
499 logger.error('Supported options are: %s' % str(supported_options))
500 __context__['retcode'] = 2
501 return False
502
503 if verify == 'space':
504 fun_cmd = 'disk.usage'
505 json_arg = 'capacity'
506 limit = space_limit
507 elif verify == 'inodes':
508 fun_cmd = 'disk.inodeusage'
509 json_arg = 'use'
510 limit = inode_limit
511
512 agent = "df status"
513 out = __salt__['saltutil.cmd']( tgt=target,
514 tgt_type=target_type,
515 fun=fun_cmd,
516 timeout=3
517 ) or None
518
519 if not _minions_output(out, agent, ignore_dead):
520 __context__['retcode'] = 2
521 return False
522
523 failed_minions = []
524 verified_minions = []
525 for minion in out:
526 verified_minions.append(minion)
527 df_json = out[minion]['ret']
528 for disk in df_json:
529 if disk not in ignore_partitions:
530 if int(df_json[disk][json_arg][:-1]) > int(limit):
531 if minion not in failed_minions:
532 failed_minions.append(minion)
533
534 if not _failed_minions(out, agent, failed_minions):
535 __context__['retcode'] = 2
536 return False
537
538 if kwargs.get("debug", False):
539 logger.info(verified_minions)
540 return True
541
542
543def load_check(target='*', target_type='glob', la1=3, la5=3, la15=3, ignore_dead=False, **kwargs):
544
545 ''' Verify load average status '''
546
547 agent = "load average status"
548 out = __salt__['saltutil.cmd']( tgt=target,
549 tgt_type=target_type,
550 fun='status.loadavg',
551 timeout=3
552 ) or None
553
554 if not _minions_output(out, agent, ignore_dead):
555 __context__['retcode'] = 2
556 return False
557
558 failed_minions = []
559 verified_minions = []
560 for minion in out:
561 verified_minions.append(minion)
562 la_json = out[minion]['ret']
563 if float(la_json['1-min']) > float(la1):
564 if minion not in failed_minions:
565 failed_minions.append(minion)
566 if float(la_json['5-min']) > float(la5):
567 if minion not in failed_minions:
568 failed_minions.append(minion)
569 if float(la_json['15-min']) > float(la15):
570 if minion not in failed_minions:
571 failed_minions.append(minion)
572
573 if not _failed_minions(out, agent, failed_minions):
574 __context__['retcode'] = 2
575 return False
576
577 if kwargs.get("debug", False):
578 logger.info(verified_minions)
579 return True
580
581
582def netdev_check(target='*', target_type='glob', rx_drop_limit=0, tx_drop_limit=0, ignore_devices=[], ignore_dead=False, **kwargs):
583
584 ''' Verify netdev rx/tx drop status '''
585
586 agent = "netdev rx/tx status"
587 out = __salt__['saltutil.cmd']( tgt=target,
588 tgt_type=target_type,
589 fun='status.netdev',
590 timeout=3
591 ) or None
592
593 if not _minions_output(out, agent, ignore_dead):
594 __context__['retcode'] = 2
595 return False
596
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200597 failed_minions = {}
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200598 verified_minions = []
599 for minion in out:
600 verified_minions.append(minion)
601 dev_json = out[minion]['ret']
602 for netdev in dev_json:
603 if netdev not in ignore_devices:
604 if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit):
605 if minion not in failed_minions:
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200606 failed_minions[minion] = {}
607 if netdev not in failed_minions[minion]:
608 failed_minions[minion][netdev] = {}
609 failed_minions[minion][netdev]['rx_drop'] = int(dev_json[netdev]['rx_drop'])
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200610 if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit):
611 if minion not in failed_minions:
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200612 failed_minions[minion] = {}
613 if netdev not in failed_minions[minion]:
614 failed_minions[minion][netdev] = {}
615 failed_minions[minion][netdev]['tx_drop'] = int(dev_json[netdev]['tx_drop'])
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200616
617 if not _failed_minions(out, agent, failed_minions):
618 __context__['retcode'] = 2
619 return False
620
621 if kwargs.get("debug", False):
622 logger.info(verified_minions)
623 return True
624
625
626def mem_check(target='*', target_type='glob', used_limit=80, ignore_dead=False, **kwargs):
627
628 ''' Verify available memory status '''
629
630 agent = "available memory status"
631 out = __salt__['saltutil.cmd']( tgt=target,
632 tgt_type=target_type,
633 fun='status.meminfo',
634 timeout=3
635 ) or None
636
637 if not _minions_output(out, agent, ignore_dead):
638 __context__['retcode'] = 2
639 return False
640
641 failed_minions = []
642 verified_minions = []
643 for minion in out:
644 mem_avail = int(out[minion]['ret']['MemAvailable']['value'])
645 mem_total = int(out[minion]['ret']['MemTotal']['value'])
646 used_pct = float((mem_total - mem_avail) * 100 / mem_total)
647 if used_pct > float(used_limit):
648 if minion not in failed_minions:
649 failed_minions.append(minion)
650 else:
651 verified_minions.append( { minion : str(used_pct) + '%' } )
652
653 if not _failed_minions(out, agent, failed_minions):
654 __context__['retcode'] = 2
655 return False
656
657 if kwargs.get("debug", False):
658 logger.info(verified_minions)
659 return True
660
661
662def ntp_status(params = ['-4', '-p', '-n']):
663
664 ''' JSON formatted ntpq command output '''
665
666 ntp_states = [
667 { 'indicator': '#', 'comment': 'source selected, distance exceeds maximum value' },
668 { 'indicator': 'o', 'comment': 'source selected, Pulse Per Second (PPS) used' },
669 { 'indicator': '+', 'comment': 'source selected, included in final set' },
670 { 'indicator': 'x', 'comment': 'source false ticker' },
671 { 'indicator': '.', 'comment': 'source selected from end of candidate list' },
672 { 'indicator': '-', 'comment': 'source discarded by cluster algorithm' },
673 { 'indicator': '*', 'comment': 'current time source' },
674 { 'indicator': ' ', 'comment': 'source discarded high stratum, failed sanity' }
675 ]
676 ntp_state_indicators = []
677 for state in ntp_states:
678 ntp_state_indicators.append(state['indicator'])
679 source_types = {}
680 source_types['l'] = "local (such as a GPS, WWVB)"
681 source_types['u'] = "unicast (most common)"
682 source_types['m'] = "multicast"
683 source_types['b'] = "broadcast"
684 source_types['-'] = "netaddr"
685
686 proc = subprocess.Popen(['ntpq'] + params, stdout=subprocess.PIPE)
687 stdout, stderr = proc.communicate()
688
689 ntp_lines = stdout.split('\n')
690 fields = re.sub("\s+", " ", ntp_lines[0]).split()
691 fields[fields.index('st')] = 'stratum'
692 fields[fields.index('t')] = 'source_type'
693
694 ntp_peers = {}
695 for line in ntp_lines[2:]:
696 if len(line.strip()) > 0:
697 element = {}
698 values = re.sub("\s+", " ", line).split()
699 for i in range(len(values)):
700 if fields[i] == 'source_type':
701 element[fields[i]] = { 'indicator': values[i], 'comment': source_types[values[i]] }
702 elif fields[i] in ['stratum', 'when', 'poll', 'reach']:
703 if values[i] == '-':
704 element[fields[i]] = int(-1)
705 else:
706 element[fields[i]] = int(values[i])
707 elif fields[i] in ['delay', 'offset', 'jitter']:
708 element[fields[i]] = float(values[i])
709 else:
710 element[fields[i]] = values[i]
711 peer = element.pop('remote')
712 peer_state = peer[0]
713 if peer_state in ntp_state_indicators:
714 peer = peer[1:]
715 else:
716 peer_state = 'f'
717 element['current'] = False
718 if peer_state == '*':
719 element['current'] = True
720 for state in ntp_states:
721 if state['indicator'] == peer_state:
722 element['state'] = state.copy()
723 if peer_state == 'f' and state['indicator'] == ' ':
724 fail_state = state.copy()
725 fail_state.pop('indicator')
726 fail_state['indicator'] = 'f'
727 element['state'] = fail_state
728 ntp_peers[peer] = element
729
730 return ntp_peers
731
732
733def ntp_check(min_peers=1, max_stratum=3, target='*', target_type='glob', ignore_dead=False, **kwargs):
734
735 ''' Verify NTP peers status '''
736
737 agent = "ntpd peers status"
738 out = __salt__['saltutil.cmd']( tgt=target,
739 tgt_type=target_type,
740 fun='health_checks.ntp_status',
741 timeout=3
742 ) or None
743
744 if not _minions_output(out, agent, ignore_dead):
745 __context__['retcode'] = 2
746 return False
747
748 failed_minions = []
749 verified_minions = []
750 for minion in out:
751 ntp_json = out[minion]['ret']
752 good_peers = []
753 for peer in ntp_json:
754 if ntp_json[peer]['stratum'] < int(max_stratum) + 1:
755 good_peers.append(peer)
756 if len(good_peers) > int(min_peers) - 1:
757 if minion not in verified_minions:
758 verified_minions.append(minion)
759 else:
760 if minion not in failed_minions:
761 failed_minions.append(minion)
762
763 if not _failed_minions(out, agent, failed_minions):
764 __context__['retcode'] = 2
765 return False
766
767 if kwargs.get("debug", False):
768 logger.info(verified_minions)
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200769
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200770 return True
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200771
772
773def gluster_pool_list():
774
775 ''' JSON formatted GlusterFS pool list command output '''
776
777 proc = subprocess.Popen(['gluster', 'pool', 'list'], stdout=subprocess.PIPE)
778 stdout, stderr = proc.communicate()
779
780 regex = re.compile('^(\S+)\s+(\S+)\s+(\S+)$')
781 fields = regex.findall(stdout.split('\n')[0])[0]
782
783 pool = {}
784
785 for line in stdout.split('\n')[1:]:
786 if len(line.strip()) > 0:
787 peer = {}
788 values = regex.findall(line.strip())[0]
789 for i in range(len(fields)):
790 peer[fields[i].lower()] = values[i]
791 uuid = peer.pop('uuid')
792 pool[uuid] = peer
793
794 return pool
795
796
797def gluster_volume_status():
798
799 ''' JSON formatted GlusterFS volumes status command output '''
800
801 proc = subprocess.Popen(['gluster', 'volume', 'status', 'all', 'detail'], stdout=subprocess.PIPE)
802 stdout, stderr = proc.communicate()
803
804 begin_volume = False
805 brick_lookup = False
806 volumes = {}
807 volume_name = ""
808
809 for line in stdout.split('\n'):
810 if 'Status of volume' in line:
811 volume_name = line.split(':')[1].strip()
812 volumes[volume_name] = { 'bricks': [] }
813 begin_volume = True
814 elif len(line.strip()) == 0:
815 if begin_volume:
816 begin_volume = False
817 elif '--------' in line:
818 brick_lookup = True
819 elif brick_lookup and line.split(':')[0].strip() == 'Brick':
820 brick_host, brick_path = re.findall('^Brick\ *:\ (.*)', line)[0].split()[1].split(':')
821 volumes[volume_name]['bricks'].append({ 'host': brick_host, 'path': brick_path })
822 brick_lookup = False
823 else:
824 brick_key, brick_value = line.split(':')
825 brick_key = brick_key.strip().lower().replace(' ', '_')
826 brick_value = brick_value.strip()
827 volumes[volume_name]['bricks'][len(volumes[volume_name]['bricks']) - 1][brick_key] = brick_value
828
829 return volumes
830
831
832def gluster_pool_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_dead=False, **kwargs):
833
834 ''' Check GlusterFS peer status '''
835
836 agent = "glusterfs peer status"
837 out = __salt__['saltutil.cmd']( tgt=target,
838 tgt_type=target_type,
839 fun='health_checks.gluster_pool_list',
840 timeout=3,
841 kwargs='[batch=True]'
842 ) or None
843
844 if not _minions_output(out, agent, ignore_dead):
845 __context__['retcode'] = 2
846 return False
847
848 failed_minions = []
849 verified_minions = []
850 for minion in out:
851 verified_minions.append(minion)
852 gluster_json = out[minion]['ret']
853 alive_peers = []
854 for peer in gluster_json:
855 if gluster_json[peer]['state'] == 'Connected':
856 alive_peers.append(peer)
857 else:
858 if minion not in failed_minions:
859 failed_minions.append(minion)
860 if len(alive_peers) < expected_size:
861 if minion not in failed_minions:
862 failed_minions.append(minion)
863
864 if not _failed_minions(out, agent, failed_minions):
865 __context__['retcode'] = 2
866 return False
867
868 if kwargs.get("debug", False):
869 logger.info(verified_minions)
870
871 return True
872
873
874def gluster_volumes_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_volumes=[], ignore_dead=False, **kwargs):
875
876 ''' Check GlusterFS volumes status '''
877
878 agent = "glusterfs volumes status"
879 out = __salt__['saltutil.cmd']( tgt=target,
880 tgt_type=target_type,
881 fun='health_checks.gluster_volume_status',
882 timeout=3,
883 kwargs='[batch=True]'
884 ) or None
885
886 if not _minions_output(out, agent, ignore_dead):
887 __context__['retcode'] = 2
888 return False
889
890 failed_minions = []
891 verified_minions = []
892 verified_volumes = []
893 for minion in out:
894 verified_minions.append(minion)
895 gluster_json = out[minion]['ret']
896 for volume in gluster_json:
897 if volume in ignore_volumes:
898 continue
899 else:
900 verified_volumes.append(volume)
901 alive_bricks = 0
902 if 'bricks' not in gluster_json[volume]:
903 if minion not in failed_minions:
904 failed_minions.append(minion)
905 bricks = gluster_json[volume]['bricks']
906 if len(bricks) < expected_size:
907 if minion not in failed_minions:
908 failed_minions.append(minion)
909 for brick in bricks:
910 if brick['online'] == 'Y':
911 alive_bricks += 1
912 else:
913 if minion not in failed_minions:
914 failed_minions.append(minion)
915 if alive_bricks < expected_size:
916 if minion not in failed_minions:
917 failed_minions.append(minion)
918
919 if not _failed_minions(out, agent, failed_minions):
920 __context__['retcode'] = 2
921 return False
922
923 if kwargs.get("debug", False):
924 logger.info("Verified minions:")
925 logger.info(verified_minions)
926 logger.info("Verified volumes:")
927 logger.info(verified_volumes)
928
929 return True
930
931
932def ceph_cmd(cmd):
933
934 ''' JSON formatted ceph command output '''
935
936 proc = subprocess.Popen(['ceph'] + cmd.split() + ['--format', 'json-pretty'], stdout=subprocess.PIPE)
937 stdout, stderr = proc.communicate()
938
939 return json.loads(stdout)
940
941
942def ceph_health_check(target='I@ceph:mon', target_type='compound', expected_status='HEALTH_OK', expected_state='active+clean', ignore_dead=False, **kwargs):
943
944 ''' Check all ceph monitors health status '''
945
946 agent = "ceph health status"
947 out = __salt__['saltutil.cmd']( tgt=target,
948 tgt_type=target_type,
949 fun='health_checks.ceph_cmd',
950 arg=['status'],
951 timeout=3
952 ) or None
953
954 if not _minions_output(out, agent, ignore_dead):
955 __context__['retcode'] = 2
956 return False
957
958 failed_minions = []
959 verified_minions = []
960 for minion in out:
961 verified_minions.append(minion)
962 ceph_json = out[minion]['ret']
963 fsid = ceph_json['fsid']
964
965 if ceph_json['health']['overall_status'] != expected_status:
966 if minion not in failed_minions:
967 failed_minions.append(minion)
968
969 if ceph_json['osdmap']['osdmap']['full']:
970 if minion not in failed_minions:
971 failed_minions.append(minion)
972
973 if ceph_json['osdmap']['osdmap']['nearfull']:
974 if minion not in failed_minions:
975 failed_minions.append(minion)
976
977 num_osds = ceph_json['osdmap']['osdmap']['num_osds']
978 num_in_osds = ceph_json['osdmap']['osdmap']['num_in_osds']
979 num_up_osds = ceph_json['osdmap']['osdmap']['num_up_osds']
980 if not ( num_osds == num_in_osds == num_up_osds ):
981 if minion not in failed_minions:
982 failed_minions.append(minion)
983
984 quorum = len(ceph_json['quorum'])
985 quorum_names = len(ceph_json['quorum_names'])
986 mons = len(ceph_json['monmap']['mons'])
987 if not ( quorum == quorum_names == mons ):
988 if minion not in failed_minions:
989 failed_minions.append(minion)
990
991 for mon in ceph_json['health']['timechecks']['mons']:
992 if mon['health'] != expected_status:
993 if minion not in failed_minions:
994 failed_minions.append(minion)
995
996 for srv in ceph_json['health']['health']['health_services']:
997 for mon in srv['mons']:
998 if mon['health'] != expected_status:
999 if minion not in failed_minions:
1000 failed_minions.append(minion)
1001
1002 for state in ceph_json['pgmap']['pgs_by_state']:
1003 if state['state_name'] != expected_state:
1004 if minion not in failed_minions:
1005 failed_minions.append(minion)
1006
1007 if not _failed_minions(out, agent, failed_minions):
1008 __context__['retcode'] = 2
1009 return False
1010
1011 if kwargs.get("debug", False):
1012 logger.info("Quorum:")
1013 logger.info(ceph_json['quorum_names'])
1014 logger.info("Verified minions:")
1015 logger.info(verified_minions)
1016
1017 return True
1018
1019
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001020def get_entropy():
1021
1022 ''' Retrieve entropy size for the host '''
1023
1024 with open('/proc/sys/kernel/random/entropy_avail', 'r') as f:
1025 entropy = f.read()
1026 return entropy
1027
1028
1029def entropy_check(target='*', target_type='glob', minimum_bits=700, ignore_dead=False, **kwargs):
1030
1031 ''' Check entropy size in cluster '''
1032
1033 agent = "entropy size status"
1034 out = __salt__['saltutil.cmd']( tgt=target,
1035 tgt_type=target_type,
1036 fun='health_checks.get_entropy',
1037 timeout=3
1038 ) or None
1039
1040 if not _minions_output(out, agent, ignore_dead):
1041 __context__['retcode'] = 2
1042 return False
1043
1044 failed_minions = []
1045 verified_minions = []
1046
1047 print out
1048 for minion in out:
1049 verified_minions.append(minion)
1050 entropy = int(out[minion]['ret'])
1051 if entropy < minimum_bits:
1052 if not minion in failed_minions:
1053 failed_minions.append(minion)
1054
1055 if not _failed_minions(out, agent, failed_minions):
1056 __context__['retcode'] = 2
1057 return False
1058
1059 if kwargs.get("debug", False):
1060 logger.info(verified_minions)
1061
1062 return True
1063
1064
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001065def docker_registry_list(host):
1066
1067 ''' Retrieve and list docker catalog '''
1068
1069 try:
1070 if host[0:4] == 'http':
1071 url = host + '/v2/'
1072 else:
1073 url = 'http://' + host + '/v2/'
1074 repos = requests.get(url + '_catalog')
1075
1076 versions = {}
1077 for repo in repos.json()['repositories']:
1078 repo_versions = requests.get(url + repo + '/tags/list')
1079 versions[repo] = repo_versions.json().pop('tags')
1080 return versions
1081 except:
1082 return {}
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001083
1084
1085def docker_ps(list_all=0):
1086
1087 import docker
1088 client = docker.client.Client(base_url='unix://var/run/docker.sock')
1089 return client.containers(all=list_all)
1090
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +02001091
1092def zookeeper_cmd(cmd, hostname='localhost', port=2181):
1093
1094 ''' Execute zookeeper cmd via socket '''
1095
1096 buf_size = 1024
1097 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1098 sock.connect((hostname, port))
1099 sock.sendall(cmd)
1100 sock.shutdown(socket.SHUT_WR)
1101 rdata = ""
1102 while 1:
1103 data = sock.recv(buf_size)
1104 if data == "":
1105 break
1106 rdata += data
1107 sock.close()
1108 return rdata
1109
1110
1111def zookeeper_stats():
1112
1113 ''' Retrieve zookeeper stats '''
1114
1115 stats = {}
1116 stats['configuration'] = {}
1117 for line in zookeeper_cmd('conf').split('\n'):
1118 if line:
1119 key, value = line.split('=')
1120 if value.strip().isdigit():
1121 value = int(value)
1122 else:
1123 value = value.strip()
1124 stats['configuration'][key.strip().lower().replace(' ', '_')] = value
1125
1126 stats['environment'] = {}
1127 for line in zookeeper_cmd('envi').split('\n')[1:]:
1128 if line:
1129 key, value = line.split('=')
1130 if value.strip().isdigit():
1131 value = int(value)
1132 else:
1133 value = value.strip()
1134 stats['environment'][key.strip().lower().replace(' ', '_')] = value
1135
1136 stats['server'] = {}
1137 for line in zookeeper_cmd('srvr').split('\n'):
1138 if line:
1139 if re.findall('^Zookeeper version:', line, flags=re.IGNORECASE):
1140 version_str = line.split(':')[1].strip()
1141 version = version_str
1142 if '-' in version_str:
1143 version_str = version_str.split('-')[0]
1144 if '.' in version_str:
1145 version = []
1146 version_list = version_str.split('.')
1147 for elem in version_list:
1148 if elem.strip().isdigit():
1149 version.append(int(elem))
1150 stats['server']['version'] = version
1151 continue
1152 if re.findall('^Latency min/avg/max:', line, flags=re.IGNORECASE):
1153 latency_min, latency_avg, latency_max = line.split(':')[1].strip().split('/')
1154 stats['server']['latency'] = {'min':int(latency_min),'max':int(latency_max),'avg':int(latency_avg)}
1155 continue
1156 key, value = line.split(':')
1157 if value.strip().isdigit():
1158 value = int(value)
1159 else:
1160 value = value.strip()
1161 stats['server'][key.strip().lower().replace(' ', '_')] = value
1162
1163 stats['clients'] = {}
1164 for line in zookeeper_cmd('cons').split('\n'):
1165 if line:
1166 clients = re.findall('^(\s*\/)(.+)(:\d+\[\d+\])(\(.+\))$', line)[0][1:]
1167 addr = clients[0]
1168 port, direction = re.findall('^(\d+)\[(\d+)\]$', clients[1][1:])[0]
1169 client = '['+addr+']:'+str(port)
1170 stats['clients'][client] = {'direction': int(direction)}
1171 for elem in clients[2][1:-1].split(','):
1172 key, value = elem.split('=')
1173 if value.strip().isdigit():
1174 value = int(value)
1175 else:
1176 value = value.strip()
1177 stats['clients'][client][key.strip().lower().replace(' ', '_')] = value
1178
1179 return stats
1180
1181
1182def get_zookeeper_leader(target='I@opencontrail:control', target_type='compound', ignore_dead=False, **kwargs):
1183
1184 ''' Retrieve zookeeper leader '''
1185
1186 agent = "zookeeper leader retrieve"
1187 out = __salt__['saltutil.cmd']( tgt=target,
1188 tgt_type=target_type,
1189 fun='health_checks.zookeeper_stats',
1190 timeout=3
1191 ) or None
1192
1193 if not _minions_output(out, agent, ignore_dead):
1194 __context__['retcode'] = 2
1195 return False
1196
1197 leader = None
1198 for minion in out:
1199 zookeeper_mode = out[minion]['ret']['server']['mode']
1200
1201 if zookeeper_mode == 'leader':
1202 leader = minion
1203
1204 return leader
1205
1206
1207def contrail_vrouter_list(api_host='127.0.0.1', api_port=9100):
1208
1209 ''' Retrieve and list contrail vrouters.
1210 Valid targets: Contrail controllers.
1211 '''
1212
1213 try:
1214 if api_host[0:4] == 'http':
1215 url = api_host + ':' + str(api_port)
1216 else:
1217 url = 'http://' + api_host + ':' + str(api_port)
1218
1219 vrouters = requests.get(url + '/virtual-routers').json()
1220 vrouter_list = []
1221 for vr in vrouters['virtual-routers']:
1222 vr_uuid = vr['uuid']
1223 for name in vr['fq_name']:
1224 if name == "default-global-system-config":
1225 continue
1226 else:
1227 vr_name = name
1228 vrouter_list.append({'name': vr_name, 'uuid': vr_uuid})
1229 return vrouter_list
1230
1231 except:
1232 return {}
1233
1234
1235def contrail_vrouter_show(vr_uuid, api_host='127.0.0.1', api_port=9100):
1236
1237 ''' Retrieve contrail vrouter data
1238 Valid targets: Contrail controllers.
1239 '''
1240
1241 try:
1242 if api_host[0:4] == 'http':
1243 url = api_host + ':' + str(api_port)
1244 else:
1245 url = 'http://' + api_host + ':' + str(api_port)
1246
1247 return requests.get(url + '/virtual-router/' + vr_uuid).json()
1248
1249 except:
1250 return {}
1251
1252
1253def _xmletree_descend_child(given_child, tag_requested):
1254
1255 ''' Returns xmletree subelement by tag name '''
1256
1257 my_child = {}
1258
1259 for child in given_child:
1260 if child.tag == tag_requested:
1261 my_child = child
1262 break
1263
1264 return my_child
1265
1266
1267def contrail_vrouter_agent_status(api_host='127.0.0.1', api_port=8085):
1268
1269 ''' Retrieve contrail vrouter agent status '''
1270
1271 import xml.etree.ElementTree as ET
1272
1273 if api_host[0:4] == 'http':
1274 url = api_host + ':' + str(api_port)
1275 else:
1276 url = 'http://' + api_host + ':' + str(api_port)
1277
1278 try:
1279 req = requests.get(url + '/Snh_SandeshUVECacheReq?x=NodeStatus')
1280 if int(req.status_code) != 200:
1281 return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
1282 except:
1283 pass
1284
1285 try:
1286 xmletree = ET.fromstring(req.text)
1287 except:
1288 return "Could not parse xml tree %s" % str(req.text)
1289
1290 try:
1291 vrouter_data = {}
1292 child = _xmletree_descend_child(xmletree, 'NodeStatusUVE')
1293 child = _xmletree_descend_child(child, 'data')
1294 child = _xmletree_descend_child(child, 'NodeStatus')
1295 child = _xmletree_descend_child(child, 'process_status')
1296 child = _xmletree_descend_child(child, 'list')
1297 child = _xmletree_descend_child(child, 'ProcessStatus')
1298 vrouter_data['state'] = _xmletree_descend_child(child, 'state').text
1299 vrouter_data['connections'] = []
1300 child = _xmletree_descend_child(child, 'connection_infos')
1301 for elem in _xmletree_descend_child(child, 'list'):
1302 conn = {}
1303 conn['type'] = _xmletree_descend_child(elem,'type').text
1304 conn['name'] = _xmletree_descend_child(elem,'name').text
1305 conn['status'] = _xmletree_descend_child(elem,'status').text
1306 conn['description'] = _xmletree_descend_child(elem,'description').text
1307 conn['server_addrs'] = []
1308 server_addrs = _xmletree_descend_child(elem,'server_addrs')
1309 for srv in _xmletree_descend_child(server_addrs,'list'):
1310 host, port = srv.text.split(':')
1311 conn['server_addrs'].append({'host': host, 'port': port})
1312 vrouter_data['connections'].append(conn)
1313 return vrouter_data
1314 except:
1315 return "Unsupported xml tree for this function %s" % str(req.text)
1316
1317
1318def libvirt_capabilities():
1319
1320 ''' JSON formatted libvirtcapabilities list '''
1321
1322 import xml.etree.ElementTree as ET
1323
1324 try:
1325 proc = subprocess.Popen(['virsh', 'capabilities'], stdout=subprocess.PIPE)
1326 stdout, stderr = proc.communicate()
1327 xmletree = ET.fromstring(stdout)
1328 except:
1329 return "Could not parse xml tree %s" % str(stdout)
1330
1331 try:
1332 capabilities = {}
1333 for elem in xmletree:
1334 if elem.tag == "guest":
1335 for el in elem:
1336 if el.tag == 'arch':
1337 _name = el.attrib['name']
1338 capabilities[_name] = []
1339 for arch in el:
1340 if arch.tag == 'machine':
1341 if 'canonical' not in arch.attrib:
1342 capabilities[_name].append(arch.text)
1343
1344 return capabilities
1345 except:
1346 return "Unsupported xml tree for this function %s" % str(stdout)
1347