blob: 9428fd5639aa8b063e41b1a8139a8a5002febe06 [file] [log] [blame]
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +02001import datetime
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02002import hashlib
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +02003import json
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +02004import logging
5import os
6import re
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +02007import requests
8import salt.utils
9import socket
10import subprocess
Dzmitry Stremkouski36290202019-05-05 21:26:25 +020011import yaml
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +020012
13__author__ = "Dzmitry Stremkouski"
14__copyright__ = "Copyright 2019, Mirantis Inc."
15__license__ = "Apache 2.0"
16
17logger = logging.getLogger(__name__)
18stream = logging.StreamHandler()
19logger.addHandler(stream)
20
Dzmitry Stremkouski36290202019-05-05 21:26:25 +020021try:
22 from yaml import CLoader as Loader, CDumper as Dumper
23except ImportError:
24 from yaml import Loader, Dumper
25
26default_vrouter_info_map = yaml.load("""
27ContrailConfig:
28- deleted
29- elements:uuid
30- elements:virtual_router_dpdk_enabled
31- elements:virtual_router_type
32VrouterAgent:
33- build_info:build-info:0:build-version
34- build_info:build-info:0:build-number
Dzmitry Stremkouski36290202019-05-05 21:26:25 +020035- config_file
36- control_ip
37- control_node_list_cfg
38- dns_server_list_cfg
39- dns_servers
40- down_interface_count
41- eth_name
42- headless_mode_cfg
43- hostname_cfg
44- hypervisor
45- mode
46- phy_if
47- platform
48- self_ip_list
49- total_interface_count
50- tunnel_type
51- vhost_cfg
52- vhost_if
53- vr_limits:max_interfaces
54- vr_limits:max_labels
55- vr_limits:max_mirror_entries
56- vr_limits:max_nexthops
57- vr_limits:max_vrfs
58- vr_limits:vrouter_max_bridge_entries
59- vr_limits:vrouter_max_flow_entries
60- vr_limits:vrouter_max_oflow_bridge_entries
61- vr_limits:vrouter_max_oflow_entries
62- xmpp_peer_list:*:ip
63- xmpp_peer_list:*:primary
64- xmpp_peer_list:*:status
65""", Loader=Loader)
66
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +020067default_peer_filter = ["encoding", "peer_address", "state"]
68
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +020069
70def _failed_minions(out, agent, failed_minions):
71
72 ''' Verify failed minions '''
73
74 if len(failed_minions) > 0:
75 logger.error("%s check FAILED" % agent)
76 logger.error("Some minions returned non-zero exit code or empty data")
77 logger.error("Failed minions:" + str(failed_minions))
78 for minion in failed_minions:
79 logger.error(minion)
80 logger.debug(str(out[minion]['ret']))
81 __context__['retcode'] = 2
82 return False
83
84 return True
85
86
87def _minions_output(out, agent, ignore_dead, ignore_empty=False):
88
89 ''' Verify minions output and exit code '''
90
91 if not out:
92 logger.error("%s check FAILED" % agent)
93 logger.error("No response from master cmd")
94 __context__['retcode'] = 2
95 return False
96
97 if not ignore_dead:
98 jid = out.itervalues().next()['jid']
99 job_stats = __salt__['saltutil.runner']( 'jobs.print_job', arg=[jid] ) or None
100 if not job_stats:
101 logger.error("%s check FAILED" % agent)
102 logger.error("No response from master runner")
103 __context__['retcode'] = 2
104 return False
105
106 job_result = job_stats[jid]['Result']
107 job_minions = job_stats[jid]['Minions']
108 if len(job_minions) != len(job_result):
109 logger.error("%s check FAILED" % agent)
110 logger.error("Some minions are offline")
111 logger.error(list(set(job_minions) - set(job_result.keys())))
112 __context__['retcode'] = 2
113 return False
114
115 failed_minions = []
116 for minion in out:
117 if 'retcode' in out[minion]:
118 if out[minion]['retcode'] == 0:
119 if not ignore_empty:
120 if isinstance(out[minion]['ret'], bool):
121 if minion not in failed_minions:
122 failed_minions.append(minion)
123 elif len(out[minion]['ret']) == 0:
124 if minion not in failed_minions:
125 failed_minions.append(minion)
126 else:
127 if minion not in failed_minions:
128 failed_minions.append(minion)
129 else:
130 if minion not in failed_minions:
131 failed_minions.append(minion)
132
133 if not _failed_minions(out, agent, failed_minions):
134 __context__['retcode'] = 2
135 return False
136
137 return True
138
139
140def minions_check(wait_timeout=1, gather_job_wait_timeout=1, target='*', target_type='glob', ignore_dead=False):
141
142 ''' Verify minions are online '''
143
144 agent = "Minions"
145 out = __salt__['saltutil.cmd']( tgt=target,
146 tgt_type=target_type,
147 fun='test.ping',
148 timeout=wait_timeout,
149 gather_job_timeout=gather_job_wait_timeout
150 ) or None
151
152 return _minions_output(out, agent, ignore_dead, ignore_empty=True)
153
154
155def time_diff_check(time_diff=1, target='*', target_type='glob', ignore_dead=False, **kwargs):
156
157 ''' Verify time diff on servers '''
158
159 agent = "Time diff"
160 out = __salt__['saltutil.cmd']( tgt=target,
161 tgt_type=target_type,
162 fun='status.time',
163 arg=['%s'],
164 timeout=3
165 ) or None
166
167 if not _minions_output(out, agent, ignore_dead):
168 __context__['retcode'] = 2
169 return False
170
171 minions_times = {}
172 env_times = []
173 verified_minions = []
174
175 for minion in out:
176 verified_minions.append(minion)
177 if out[minion]['retcode'] == 0:
178 minion_time = int(out[minion]['ret'])
179 if str(minion_time) not in minions_times:
180 minions_times[str(minion_time)] = []
181 minions_times[str(minion_time)].append(minion)
182 env_times.append(minion_time)
183
184 env_times.sort()
185 diff = env_times[-1] - env_times[0]
186
187 if diff > time_diff:
188 __context__['retcode'] = 2
189 if kwargs.get("debug", False):
190 return False, minions_times
191 else:
192 return False
193
194 if kwargs.get("debug", False):
195 logger.info(verified_minions)
196 return True
197
198
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200199def contrail_process_list(**kwargs):
200
201 ''' Retrieve contrail process pids and start_time '''
202
203 cmd = ['contrail-status', '-d']
204
205 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
206 stdout, stderr = proc.communicate()
207
208 procs = {}
209 for line in stdout.split('\n'):
210 if re.findall('^(\S+).*pid ([0-9]+),.*$', line):
211 stat = line.split()
212 procs[stat[0]] = int(stat[3][:-1])
213
214 if kwargs.get('role', 'compute') == 'controller':
215
216 for service in ['zookeeper', 'ifmap-server']:
217 cmd = ['service', service, 'status']
218
219 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
220 stdout, stderr = proc.communicate()
221
222 for line in stdout.split('\n'):
223 if re.findall('^(\S+).*process ([0-9]+)$', line):
224 stat = line.split()
225 procs[stat[0]] = int(stat[3])
226
227 ctime = int(datetime.datetime.now().strftime("%s"))
228 btime_re = re.compile(r"^btime (\d+)$", re.MULTILINE)
229 btime_groups = btime_re.search(open("/proc/stat").read())
230 btime = int(btime_groups.groups()[0])
231 clk_tck = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
232 for proc in procs:
233 pid = procs[proc]
234 try:
235 with open('/proc/%s/stat' % str(pid), 'r') as f:
236 stat = f.read()
237 jitty_time = int(stat.split(') ')[1].split()[19]) / clk_tck
238 proc_uptime = ctime - btime - int(jitty_time)
239 except:
240 proc_uptime = 0
241 procs[proc] = { 'pid': pid, 'uptime': proc_uptime }
242
243 return procs
244
245
246def contrail_check(target='I@opencontrail:control or I@opencontrail:collector or I@opencontrail:compute', nodetool_target='I@opencontrail:control or I@opencontrail:collector', compute_target='I@opencontrail:compute', target_type='compound', nodetool_target_type='compound', compute_target_type='compound', nodetool_expected_size=3, proc_min_uptime=30, ignore_dead=False, **kwargs):
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200247
248 ''' Verify contrail status returns nothing critical '''
249
250 agent = "Contrail status"
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200251
252 # Check #1 contrail-status
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200253 out = __salt__['saltutil.cmd']( tgt=target,
254 tgt_type=target_type,
255 fun='cmd.run',
256 arg=['contrail-status'],
257 timeout=5
258 ) or None
259
260 if not _minions_output(out, agent, ignore_dead):
261 __context__['retcode'] = 2
262 return False
263
264 failed_minions = []
265 pattern = '^(==|$|\S+\s+(active|backup|inactive\s\(disabled\son\sboot\)))'
266 prog = re.compile(pattern)
267
268 validated = []
269 for minion in out:
270 for line in out[minion]['ret'].split('\n'):
271 if not prog.match(line) and minion not in failed_minions:
272 failed_minions.append(minion)
273 validated.append(minion)
274
275 if not _failed_minions(out, agent, failed_minions):
276 __context__['retcode'] = 2
277 return False
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200278 if kwargs.get("debug", False):
279 logger.info(validated)
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200280
281 # Check #2 nodetool
282 out = __salt__['saltutil.cmd']( tgt=nodetool_target,
283 tgt_type=nodetool_target_type,
284 fun='cmd.run',
285 arg=['nodetool status'],
286 timeout=5
287 ) or None
288
289 if not _minions_output(out, agent, ignore_dead):
290 __context__['retcode'] = 2
291 return False
292
293 failed_minions = []
294 pattern = '^UN'
295 prog = re.compile(pattern)
296
297 validated = []
298 for minion in out:
299 size = 0
300 for line in out[minion]['ret'].split('\n'):
301 if prog.match(line):
302 size += 1
303 if not size == nodetool_expected_size and minion not in failed_minions:
304 failed_minions.append(minion)
305 validated.append(minion)
306
307 if not _failed_minions(out, agent, failed_minions):
308 __context__['retcode'] = 2
309 return False
310 if kwargs.get("debug", False):
311 logger.info(validated)
312
313 # Check #3 process status control
314 out = __salt__['saltutil.cmd']( tgt=nodetool_target,
315 tgt_type=nodetool_target_type,
316 fun='health_checks.contrail_process_list',
317 arg=['role=controller'],
318 timeout=5
319 ) or None
320
321 if not _minions_output(out, agent, ignore_dead):
322 __context__['retcode'] = 2
323 return False
324
325 failed_minions = []
326 validated = []
327 for minion in out:
328 procs = out[minion]['ret']
329 for proc in procs:
330 proc_uptime = procs[proc]['uptime']
331 if proc_uptime < proc_min_uptime:
332 if minion not in failed_minions:
333 failed_minions.append(minion)
334 logger.error({'minion': minion, 'name': proc, 'uptime': proc_uptime})
335 validated.append(minion)
336
337 if not _failed_minions(out, agent, failed_minions):
338 __context__['retcode'] = 2
339 return False
340 if kwargs.get("debug", False):
341 logger.info(validated)
342
343 # Check #4 process status computes
344 out = __salt__['saltutil.cmd']( tgt=compute_target,
345 tgt_type=compute_target_type,
346 fun='health_checks.contrail_process_list',
347 timeout=5
348 ) or None
349
350 if not _minions_output(out, agent, ignore_dead):
351 __context__['retcode'] = 2
352 return False
353
354 failed_minions = []
355 validated = []
356 for minion in out:
357 procs = out[minion]['ret']
358 for proc in procs:
359 proc_uptime = procs[proc]['uptime']
360 if proc_uptime < proc_min_uptime:
361 if minion not in failed_minions:
362 failed_minions.append(minion)
363 logger.error({'minion': minion, 'name': proc, 'uptime': proc_uptime})
364 validated.append(minion)
365
366 if not _failed_minions(out, agent, failed_minions):
367 __context__['retcode'] = 2
368 return False
369 if kwargs.get("debug", False):
370 logger.info(validated)
371
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200372 return True
373
374
375def galera_check(cluster_size=3, target='I@galera:master or I@galera:slave', target_type='compound', ignore_dead=False, **kwargs):
376
377 ''' Verify galera cluster size and state '''
378
379 agent = "Galera status"
380 out = __salt__['saltutil.cmd']( tgt=target,
381 tgt_type=target_type,
382 fun='mysql.status',
383 timeout=3
384 ) or None
385
386 if not _minions_output(out, agent, ignore_dead):
387 __context__['retcode'] = 2
388 return False
389
390 failed_minions = []
391
392 validated = []
393 for minion in out:
394 if int(out[minion]['ret']['wsrep_cluster_size']) != int(cluster_size) and minion not in failed_minions:
395 failed_minions.append(minion)
396 if out[minion]['ret']['wsrep_evs_state'] != 'OPERATIONAL' and minion not in failed_minions:
397 failed_minions.append(minion)
398 validated.append(minion)
399
400 if not _failed_minions(out, agent, failed_minions):
401 __context__['retcode'] = 2
402 return False
403
404 if kwargs.get("debug", False):
405 logger.info(validated)
406 logger.info("Cluster size: " + str(out[validated[0]]['ret']['wsrep_cluster_size']))
407 logger.info("Cluster state: " + str(out[validated[0]]['ret']['wsrep_evs_state']))
408 return True
409
410
411def _quote_str(s, l=False, r=False):
412
413 ''' Quting rabbitmq erl objects for json import '''
414
415 if len(s) > 0:
416 if l:
417 s = s.lstrip()
418 if r:
419 s = s.rstrip()
420 if (s[0] == "'") and (s[-1] != "'") and r and not l:
421 s += "'"
422 if (s[0] == '"') and (s[-1] != '"') and r and not l:
423 s += '"'
424 if (s[-1] == "'") and (s[0] != "'") and l and not r:
425 s = "'" + s
426 if (s[-1] == '"') and (s[0] != '"') and l and not r:
427 s = '"' + s
428 if (s[-1] != "'") and (s[-1] != '"') and (s[0] != "'") and (s[0] != '"'):
429 s = '"' + s.replace('"', '\\\"') + '"'
430 else:
431 if (not l) and (not r) and s[0] != '"' and not s[-1] != '"':
432 s= s.replace('"', '\\\"')
433 return s.replace("'", '"')
434 else:
435 return s
436
437
438def _sanitize_rmqctl_output(string):
439
440 ''' Sanitizing rabbitmq erl objects for json import '''
441
442 rabbitctl_json = ""
443 for line in string.split(','):
444 copy = line
445 left = ""
446 right = ""
447 mid = copy
448 lpar = False
449 rpar = False
450 if re.search('([\[\{\s]+)(.*)', copy):
451 mid = re.sub('^([\[\{\s]+)','', copy)
452 left = copy[:-len(mid)]
453 copy = mid
454 lpar = True
455 if re.search('(.*)([\]\}\s]+)$', copy):
456 mid = re.sub('([\]\}\s]+)$','', copy)
457 right = copy[len(mid):]
458 copy = mid
459 rpar = True
460 result = left + _quote_str(mid, l=lpar, r=rpar) + right
461 if (not rpar) and lpar and (len(left.strip()) > 0) and (left.strip()[-1] == '{'):
462 result += ":"
463 else:
464 result += ","
465 rabbitctl_json += result
466
467 rabbitctl_json = rabbitctl_json[:-1]
468 new_rabbitctl_json = rabbitctl_json
469 for s in re.findall('"[^:\[{\]}]+"\s*:\s*("[^\[{\]}]+")', rabbitctl_json):
470 if '"' in s[1:][:-1]:
471 orig = s
472 changed = '"' + s.replace('\\', '\\\\').replace('"', '\\\"') + '"'
473 new_rabbitctl_json = new_rabbitctl_json.replace(orig, changed)
474 return new_rabbitctl_json
475
476
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200477def rabbitmq_list_queues(vhost='/'):
478
479 ''' JSON formatted RabbitMQ queues list '''
480
481 proc = subprocess.Popen(['rabbitmqctl', 'list_queues' , '-p', vhost], stdout=subprocess.PIPE)
482 stdout, stderr = proc.communicate()
483
484 queues = {}
485 for line in stdout.split('\n'):
486 if re.findall('[0-9]$', line):
487 queue_name, num = re.sub(r"\s+", " ", line).split()
488 queues[queue_name] = int(num)
489
490 return queues
491
492
493def rabbitmq_list_vhosts():
494
495 ''' JSON formatted RabbitMQ vhosts list '''
496
497 proc = subprocess.Popen(['rabbitmqctl', 'list_vhosts'], stdout=subprocess.PIPE)
498 stdout, stderr = proc.communicate()
499
500 vhosts = []
501 for line in stdout.split('\n'):
502 if re.findall('^/', line):
503 vhosts.append(line)
504
505 return vhosts
506
507
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200508def rabbitmq_cmd(cmd):
509
510 ''' JSON formatted RabbitMQ command output '''
511
512 supported_commands = ['status', 'cluster_status', 'list_hashes', 'list_ciphers']
513 if cmd not in supported_commands:
514 logger.error("Command is not supported yet, sorry")
515 logger.error("Supported commands are: " + str(supported_commands))
516 __context__['retcode'] = 2
517 return False
518
519 proc = subprocess.Popen(['rabbitmqctl', cmd], stdout=subprocess.PIPE)
520 stdout, stderr = proc.communicate()
521
522 rabbitmqctl_cutoff = stdout[int(stdout.find('[')):int(stdout.rfind(']'))+1].replace('\n','')
523 return json.loads(_sanitize_rmqctl_output(rabbitmqctl_cutoff))
524
525
526def rabbitmq_check(target='I@rabbitmq:server', target_type='compound', ignore_dead=False, **kwargs):
527
528 ''' Verify rabbit cluster and it's alarms '''
529
530 agent = "RabbitMQ status"
531 out = __salt__['saltutil.cmd']( tgt=target,
532 tgt_type=target_type,
533 fun='health_checks.rabbitmq_cmd',
534 arg=['cluster_status'],
535 timeout=3
536 ) or None
537
538 if not _minions_output(out, agent, ignore_dead):
539 __context__['retcode'] = 2
540 return False
541
542 failed_minions = []
543
544 for minion in out:
545 rabbitmqctl_json = out[minion]['ret']
546 running_nodes = []
547 available_nodes = []
548 alarms = []
549 for el in rabbitmqctl_json:
550 if 'alarms' in el:
551 alarms = el['alarms']
552 if 'nodes' in el:
553 available_nodes = el['nodes'][0]['disc']
554 if 'running_nodes' in el:
555 running_nodes = el['running_nodes']
556
557 if running_nodes.sort() == available_nodes.sort():
558 nodes_alarms = []
559 for node in running_nodes:
560 for el in alarms:
561 if node in el:
562 if len(el[node]) > 0:
563 nodes_alarms.append(el[node])
564 if len(nodes_alarms) > 0:
565 failed_minions.append(minion)
566 else:
567 failed_minions.append(minion)
568
569 if not _failed_minions(out, agent, failed_minions):
570 __context__['retcode'] = 2
571 return False
572
573 if kwargs.get("debug", False):
574 logger.info(running_nodes)
575 return True
576
577
578def haproxy_status(socket_path='/run/haproxy/admin.sock', buff_size = 8192, encoding = 'UTF-8', stats_filter=[]):
579
580 ''' JSON formatted haproxy status '''
581
582 stat_cmd = 'show stat\n'
583
584 if not os.path.exists(socket_path):
585 logger.error('Socket %s does not exist or haproxy not running' % socket_path)
586 __context__['retcode'] = 2
587 return False
588
589 client = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM)
590 client.connect(socket_path)
591 stat_cmd = 'show stat\n'
592
593 client.send(bytearray(stat_cmd, encoding))
594 output = client.recv(buff_size)
595
596 res = ""
597 while output:
598 res += output.decode(encoding)
599 output = client.recv(buff_size)
600 client.close()
601
602 haproxy_stats = {}
603 res_list = res.split('\n')
604 fields = res_list[0][2:].split(',')
605 stats_list = []
606 for line in res_list[1:]:
607 if len(line.strip()) > 0:
608 stats_list.append(line)
609
610 for i in range(len(stats_list)):
611 element = {}
612 for n in fields:
613 element[n] = stats_list[i].split(',')[fields.index(n)]
614 server_name = element.pop('pxname')
615 server_type = element.pop('svname')
616 if stats_filter:
617 filtered_element = element.copy()
618 for el in element:
619 if el not in stats_filter:
620 filtered_element.pop(el)
621 element = filtered_element
622 if server_name not in haproxy_stats:
623 haproxy_stats[server_name] = {}
624 if server_type == "FRONTEND" or server_type == "BACKEND":
625 haproxy_stats[server_name][server_type] = element
626 else:
627 if 'UPSTREAM' not in haproxy_stats[server_name]:
628 haproxy_stats[server_name]['UPSTREAM'] = {}
629 haproxy_stats[server_name]['UPSTREAM'][server_type] = element
630
631 return haproxy_stats
632
633
634def haproxy_check(target='I@haproxy:proxy', target_type='compound', ignore_dead=False, ignore_services=[], ignore_upstreams=[], ignore_no_upstream=False, **kwargs):
635
636 ''' Verify haproxy backends status '''
637
638 agent = "haproxy status"
639 out = __salt__['saltutil.cmd']( tgt=target,
640 tgt_type=target_type,
641 fun='health_checks.haproxy_status',
642 arg=["stats_filter=['status']"],
643 timeout=3
644 ) or None
645
646 if not _minions_output(out, agent, ignore_dead):
647 __context__['retcode'] = 2
648 return False
649
650 failed_minions = []
651 verified_minions = []
652 for minion in out:
653 verified_minions.append(minion)
654 haproxy_json = out[minion]['ret']
655 for service in haproxy_json:
656 if service not in ignore_services:
657 if haproxy_json[service]['FRONTEND']['status'] != 'OPEN':
658 if minion not in failed_minions:
659 failed_minions.append(minion)
660 if haproxy_json[service]['BACKEND']['status'] != 'UP':
661 if minion not in failed_minions:
662 failed_minions.append(minion)
663 if 'UPSTREAM' in haproxy_json[service]:
664 for upstream in haproxy_json[service]['UPSTREAM']:
665 if upstream not in ignore_upstreams:
666 if haproxy_json[service]['UPSTREAM'][upstream]['status'] != 'UP':
667 if minion not in failed_minions:
668 failed_minions.append(minion)
669 else:
670 if not ignore_no_upstream:
671 if minion not in failed_minions:
672 failed_minions.append(minion)
673
674 if not _failed_minions(out, agent, failed_minions):
675 __context__['retcode'] = 2
676 return False
677
678 if kwargs.get("debug", False):
679 logger.info(verified_minions)
680 return True
681
682
683def df_check(target='*', target_type='glob', verify='space', space_limit=80, inode_limit=80, ignore_dead=False, ignore_partitions=[], **kwargs):
684
685 ''' Verify storage space/inodes status '''
686
687 supported_options = ['space', 'inodes']
688 if verify not in supported_options:
689 logger.error('Unsupported "verify" option.')
690 logger.error('Supported options are: %s' % str(supported_options))
691 __context__['retcode'] = 2
692 return False
693
694 if verify == 'space':
695 fun_cmd = 'disk.usage'
696 json_arg = 'capacity'
697 limit = space_limit
698 elif verify == 'inodes':
699 fun_cmd = 'disk.inodeusage'
700 json_arg = 'use'
701 limit = inode_limit
702
703 agent = "df status"
704 out = __salt__['saltutil.cmd']( tgt=target,
705 tgt_type=target_type,
706 fun=fun_cmd,
707 timeout=3
708 ) or None
709
710 if not _minions_output(out, agent, ignore_dead):
711 __context__['retcode'] = 2
712 return False
713
714 failed_minions = []
715 verified_minions = []
716 for minion in out:
717 verified_minions.append(minion)
718 df_json = out[minion]['ret']
719 for disk in df_json:
720 if disk not in ignore_partitions:
721 if int(df_json[disk][json_arg][:-1]) > int(limit):
722 if minion not in failed_minions:
723 failed_minions.append(minion)
724
725 if not _failed_minions(out, agent, failed_minions):
726 __context__['retcode'] = 2
727 return False
728
729 if kwargs.get("debug", False):
730 logger.info(verified_minions)
731 return True
732
733
734def load_check(target='*', target_type='glob', la1=3, la5=3, la15=3, ignore_dead=False, **kwargs):
735
736 ''' Verify load average status '''
737
738 agent = "load average status"
739 out = __salt__['saltutil.cmd']( tgt=target,
740 tgt_type=target_type,
741 fun='status.loadavg',
742 timeout=3
743 ) or None
744
745 if not _minions_output(out, agent, ignore_dead):
746 __context__['retcode'] = 2
747 return False
748
749 failed_minions = []
750 verified_minions = []
751 for minion in out:
752 verified_minions.append(minion)
753 la_json = out[minion]['ret']
754 if float(la_json['1-min']) > float(la1):
755 if minion not in failed_minions:
756 failed_minions.append(minion)
757 if float(la_json['5-min']) > float(la5):
758 if minion not in failed_minions:
759 failed_minions.append(minion)
760 if float(la_json['15-min']) > float(la15):
761 if minion not in failed_minions:
762 failed_minions.append(minion)
763
764 if not _failed_minions(out, agent, failed_minions):
765 __context__['retcode'] = 2
766 return False
767
768 if kwargs.get("debug", False):
769 logger.info(verified_minions)
770 return True
771
772
773def netdev_check(target='*', target_type='glob', rx_drop_limit=0, tx_drop_limit=0, ignore_devices=[], ignore_dead=False, **kwargs):
774
775 ''' Verify netdev rx/tx drop status '''
776
777 agent = "netdev rx/tx status"
778 out = __salt__['saltutil.cmd']( tgt=target,
779 tgt_type=target_type,
780 fun='status.netdev',
781 timeout=3
782 ) or None
783
784 if not _minions_output(out, agent, ignore_dead):
785 __context__['retcode'] = 2
786 return False
787
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200788 failed_minions = {}
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200789 verified_minions = []
790 for minion in out:
791 verified_minions.append(minion)
792 dev_json = out[minion]['ret']
793 for netdev in dev_json:
794 if netdev not in ignore_devices:
795 if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit):
796 if minion not in failed_minions:
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200797 failed_minions[minion] = {}
798 if netdev not in failed_minions[minion]:
799 failed_minions[minion][netdev] = {}
800 failed_minions[minion][netdev]['rx_drop'] = int(dev_json[netdev]['rx_drop'])
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200801 if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit):
802 if minion not in failed_minions:
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200803 failed_minions[minion] = {}
804 if netdev not in failed_minions[minion]:
805 failed_minions[minion][netdev] = {}
806 failed_minions[minion][netdev]['tx_drop'] = int(dev_json[netdev]['tx_drop'])
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200807
808 if not _failed_minions(out, agent, failed_minions):
809 __context__['retcode'] = 2
810 return False
811
812 if kwargs.get("debug", False):
813 logger.info(verified_minions)
814 return True
815
816
817def mem_check(target='*', target_type='glob', used_limit=80, ignore_dead=False, **kwargs):
818
819 ''' Verify available memory status '''
820
821 agent = "available memory status"
822 out = __salt__['saltutil.cmd']( tgt=target,
823 tgt_type=target_type,
824 fun='status.meminfo',
825 timeout=3
826 ) or None
827
828 if not _minions_output(out, agent, ignore_dead):
829 __context__['retcode'] = 2
830 return False
831
832 failed_minions = []
833 verified_minions = []
834 for minion in out:
835 mem_avail = int(out[minion]['ret']['MemAvailable']['value'])
836 mem_total = int(out[minion]['ret']['MemTotal']['value'])
837 used_pct = float((mem_total - mem_avail) * 100 / mem_total)
838 if used_pct > float(used_limit):
839 if minion not in failed_minions:
840 failed_minions.append(minion)
841 else:
842 verified_minions.append( { minion : str(used_pct) + '%' } )
843
844 if not _failed_minions(out, agent, failed_minions):
845 __context__['retcode'] = 2
846 return False
847
848 if kwargs.get("debug", False):
849 logger.info(verified_minions)
850 return True
851
852
853def ntp_status(params = ['-4', '-p', '-n']):
854
855 ''' JSON formatted ntpq command output '''
856
857 ntp_states = [
858 { 'indicator': '#', 'comment': 'source selected, distance exceeds maximum value' },
859 { 'indicator': 'o', 'comment': 'source selected, Pulse Per Second (PPS) used' },
860 { 'indicator': '+', 'comment': 'source selected, included in final set' },
861 { 'indicator': 'x', 'comment': 'source false ticker' },
862 { 'indicator': '.', 'comment': 'source selected from end of candidate list' },
863 { 'indicator': '-', 'comment': 'source discarded by cluster algorithm' },
864 { 'indicator': '*', 'comment': 'current time source' },
865 { 'indicator': ' ', 'comment': 'source discarded high stratum, failed sanity' }
866 ]
867 ntp_state_indicators = []
868 for state in ntp_states:
869 ntp_state_indicators.append(state['indicator'])
870 source_types = {}
871 source_types['l'] = "local (such as a GPS, WWVB)"
872 source_types['u'] = "unicast (most common)"
873 source_types['m'] = "multicast"
874 source_types['b'] = "broadcast"
875 source_types['-'] = "netaddr"
876
877 proc = subprocess.Popen(['ntpq'] + params, stdout=subprocess.PIPE)
878 stdout, stderr = proc.communicate()
879
880 ntp_lines = stdout.split('\n')
881 fields = re.sub("\s+", " ", ntp_lines[0]).split()
882 fields[fields.index('st')] = 'stratum'
883 fields[fields.index('t')] = 'source_type'
884
885 ntp_peers = {}
886 for line in ntp_lines[2:]:
887 if len(line.strip()) > 0:
888 element = {}
889 values = re.sub("\s+", " ", line).split()
890 for i in range(len(values)):
891 if fields[i] == 'source_type':
892 element[fields[i]] = { 'indicator': values[i], 'comment': source_types[values[i]] }
893 elif fields[i] in ['stratum', 'when', 'poll', 'reach']:
894 if values[i] == '-':
895 element[fields[i]] = int(-1)
896 else:
897 element[fields[i]] = int(values[i])
898 elif fields[i] in ['delay', 'offset', 'jitter']:
899 element[fields[i]] = float(values[i])
900 else:
901 element[fields[i]] = values[i]
902 peer = element.pop('remote')
903 peer_state = peer[0]
904 if peer_state in ntp_state_indicators:
905 peer = peer[1:]
906 else:
907 peer_state = 'f'
908 element['current'] = False
909 if peer_state == '*':
910 element['current'] = True
911 for state in ntp_states:
912 if state['indicator'] == peer_state:
913 element['state'] = state.copy()
914 if peer_state == 'f' and state['indicator'] == ' ':
915 fail_state = state.copy()
916 fail_state.pop('indicator')
917 fail_state['indicator'] = 'f'
918 element['state'] = fail_state
919 ntp_peers[peer] = element
920
921 return ntp_peers
922
923
924def ntp_check(min_peers=1, max_stratum=3, target='*', target_type='glob', ignore_dead=False, **kwargs):
925
926 ''' Verify NTP peers status '''
927
928 agent = "ntpd peers status"
929 out = __salt__['saltutil.cmd']( tgt=target,
930 tgt_type=target_type,
931 fun='health_checks.ntp_status',
932 timeout=3
933 ) or None
934
935 if not _minions_output(out, agent, ignore_dead):
936 __context__['retcode'] = 2
937 return False
938
939 failed_minions = []
940 verified_minions = []
941 for minion in out:
942 ntp_json = out[minion]['ret']
943 good_peers = []
944 for peer in ntp_json:
945 if ntp_json[peer]['stratum'] < int(max_stratum) + 1:
946 good_peers.append(peer)
947 if len(good_peers) > int(min_peers) - 1:
948 if minion not in verified_minions:
949 verified_minions.append(minion)
950 else:
951 if minion not in failed_minions:
952 failed_minions.append(minion)
953
954 if not _failed_minions(out, agent, failed_minions):
955 __context__['retcode'] = 2
956 return False
957
958 if kwargs.get("debug", False):
959 logger.info(verified_minions)
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200960
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200961 return True
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200962
963
964def gluster_pool_list():
965
966 ''' JSON formatted GlusterFS pool list command output '''
967
968 proc = subprocess.Popen(['gluster', 'pool', 'list'], stdout=subprocess.PIPE)
969 stdout, stderr = proc.communicate()
970
971 regex = re.compile('^(\S+)\s+(\S+)\s+(\S+)$')
972 fields = regex.findall(stdout.split('\n')[0])[0]
973
974 pool = {}
975
976 for line in stdout.split('\n')[1:]:
977 if len(line.strip()) > 0:
978 peer = {}
979 values = regex.findall(line.strip())[0]
980 for i in range(len(fields)):
981 peer[fields[i].lower()] = values[i]
982 uuid = peer.pop('uuid')
983 pool[uuid] = peer
984
985 return pool
986
987
988def gluster_volume_status():
989
990 ''' JSON formatted GlusterFS volumes status command output '''
991
992 proc = subprocess.Popen(['gluster', 'volume', 'status', 'all', 'detail'], stdout=subprocess.PIPE)
993 stdout, stderr = proc.communicate()
994
995 begin_volume = False
996 brick_lookup = False
997 volumes = {}
998 volume_name = ""
999
1000 for line in stdout.split('\n'):
1001 if 'Status of volume' in line:
1002 volume_name = line.split(':')[1].strip()
1003 volumes[volume_name] = { 'bricks': [] }
1004 begin_volume = True
1005 elif len(line.strip()) == 0:
1006 if begin_volume:
1007 begin_volume = False
1008 elif '--------' in line:
1009 brick_lookup = True
1010 elif brick_lookup and line.split(':')[0].strip() == 'Brick':
1011 brick_host, brick_path = re.findall('^Brick\ *:\ (.*)', line)[0].split()[1].split(':')
1012 volumes[volume_name]['bricks'].append({ 'host': brick_host, 'path': brick_path })
1013 brick_lookup = False
1014 else:
1015 brick_key, brick_value = line.split(':')
1016 brick_key = brick_key.strip().lower().replace(' ', '_')
1017 brick_value = brick_value.strip()
1018 volumes[volume_name]['bricks'][len(volumes[volume_name]['bricks']) - 1][brick_key] = brick_value
1019
1020 return volumes
1021
1022
1023def gluster_pool_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_dead=False, **kwargs):
1024
1025 ''' Check GlusterFS peer status '''
1026
1027 agent = "glusterfs peer status"
1028 out = __salt__['saltutil.cmd']( tgt=target,
1029 tgt_type=target_type,
1030 fun='health_checks.gluster_pool_list',
1031 timeout=3,
1032 kwargs='[batch=True]'
1033 ) or None
1034
1035 if not _minions_output(out, agent, ignore_dead):
1036 __context__['retcode'] = 2
1037 return False
1038
1039 failed_minions = []
1040 verified_minions = []
1041 for minion in out:
1042 verified_minions.append(minion)
1043 gluster_json = out[minion]['ret']
1044 alive_peers = []
1045 for peer in gluster_json:
1046 if gluster_json[peer]['state'] == 'Connected':
1047 alive_peers.append(peer)
1048 else:
1049 if minion not in failed_minions:
1050 failed_minions.append(minion)
1051 if len(alive_peers) < expected_size:
1052 if minion not in failed_minions:
1053 failed_minions.append(minion)
1054
1055 if not _failed_minions(out, agent, failed_minions):
1056 __context__['retcode'] = 2
1057 return False
1058
1059 if kwargs.get("debug", False):
1060 logger.info(verified_minions)
1061
1062 return True
1063
1064
1065def gluster_volumes_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_volumes=[], ignore_dead=False, **kwargs):
1066
1067 ''' Check GlusterFS volumes status '''
1068
1069 agent = "glusterfs volumes status"
1070 out = __salt__['saltutil.cmd']( tgt=target,
1071 tgt_type=target_type,
1072 fun='health_checks.gluster_volume_status',
1073 timeout=3,
1074 kwargs='[batch=True]'
1075 ) or None
1076
1077 if not _minions_output(out, agent, ignore_dead):
1078 __context__['retcode'] = 2
1079 return False
1080
1081 failed_minions = []
1082 verified_minions = []
1083 verified_volumes = []
1084 for minion in out:
1085 verified_minions.append(minion)
1086 gluster_json = out[minion]['ret']
1087 for volume in gluster_json:
1088 if volume in ignore_volumes:
1089 continue
1090 else:
1091 verified_volumes.append(volume)
1092 alive_bricks = 0
1093 if 'bricks' not in gluster_json[volume]:
1094 if minion not in failed_minions:
1095 failed_minions.append(minion)
1096 bricks = gluster_json[volume]['bricks']
1097 if len(bricks) < expected_size:
1098 if minion not in failed_minions:
1099 failed_minions.append(minion)
1100 for brick in bricks:
1101 if brick['online'] == 'Y':
1102 alive_bricks += 1
1103 else:
1104 if minion not in failed_minions:
1105 failed_minions.append(minion)
1106 if alive_bricks < expected_size:
1107 if minion not in failed_minions:
1108 failed_minions.append(minion)
1109
1110 if not _failed_minions(out, agent, failed_minions):
1111 __context__['retcode'] = 2
1112 return False
1113
1114 if kwargs.get("debug", False):
1115 logger.info("Verified minions:")
1116 logger.info(verified_minions)
1117 logger.info("Verified volumes:")
1118 logger.info(verified_volumes)
1119
1120 return True
1121
1122
1123def ceph_cmd(cmd):
1124
1125 ''' JSON formatted ceph command output '''
1126
1127 proc = subprocess.Popen(['ceph'] + cmd.split() + ['--format', 'json-pretty'], stdout=subprocess.PIPE)
1128 stdout, stderr = proc.communicate()
1129
1130 return json.loads(stdout)
1131
1132
1133def ceph_health_check(target='I@ceph:mon', target_type='compound', expected_status='HEALTH_OK', expected_state='active+clean', ignore_dead=False, **kwargs):
1134
1135 ''' Check all ceph monitors health status '''
1136
1137 agent = "ceph health status"
1138 out = __salt__['saltutil.cmd']( tgt=target,
1139 tgt_type=target_type,
1140 fun='health_checks.ceph_cmd',
1141 arg=['status'],
1142 timeout=3
1143 ) or None
1144
1145 if not _minions_output(out, agent, ignore_dead):
1146 __context__['retcode'] = 2
1147 return False
1148
1149 failed_minions = []
1150 verified_minions = []
1151 for minion in out:
1152 verified_minions.append(minion)
1153 ceph_json = out[minion]['ret']
1154 fsid = ceph_json['fsid']
1155
1156 if ceph_json['health']['overall_status'] != expected_status:
1157 if minion not in failed_minions:
1158 failed_minions.append(minion)
1159
1160 if ceph_json['osdmap']['osdmap']['full']:
1161 if minion not in failed_minions:
1162 failed_minions.append(minion)
1163
1164 if ceph_json['osdmap']['osdmap']['nearfull']:
1165 if minion not in failed_minions:
1166 failed_minions.append(minion)
1167
1168 num_osds = ceph_json['osdmap']['osdmap']['num_osds']
1169 num_in_osds = ceph_json['osdmap']['osdmap']['num_in_osds']
1170 num_up_osds = ceph_json['osdmap']['osdmap']['num_up_osds']
1171 if not ( num_osds == num_in_osds == num_up_osds ):
1172 if minion not in failed_minions:
1173 failed_minions.append(minion)
1174
1175 quorum = len(ceph_json['quorum'])
1176 quorum_names = len(ceph_json['quorum_names'])
1177 mons = len(ceph_json['monmap']['mons'])
1178 if not ( quorum == quorum_names == mons ):
1179 if minion not in failed_minions:
1180 failed_minions.append(minion)
1181
1182 for mon in ceph_json['health']['timechecks']['mons']:
1183 if mon['health'] != expected_status:
1184 if minion not in failed_minions:
1185 failed_minions.append(minion)
1186
1187 for srv in ceph_json['health']['health']['health_services']:
1188 for mon in srv['mons']:
1189 if mon['health'] != expected_status:
1190 if minion not in failed_minions:
1191 failed_minions.append(minion)
1192
1193 for state in ceph_json['pgmap']['pgs_by_state']:
1194 if state['state_name'] != expected_state:
1195 if minion not in failed_minions:
1196 failed_minions.append(minion)
1197
1198 if not _failed_minions(out, agent, failed_minions):
1199 __context__['retcode'] = 2
1200 return False
1201
1202 if kwargs.get("debug", False):
1203 logger.info("Quorum:")
1204 logger.info(ceph_json['quorum_names'])
1205 logger.info("Verified minions:")
1206 logger.info(verified_minions)
1207
1208 return True
1209
1210
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001211def get_entropy():
1212
1213 ''' Retrieve entropy size for the host '''
1214
1215 with open('/proc/sys/kernel/random/entropy_avail', 'r') as f:
1216 entropy = f.read()
1217 return entropy
1218
1219
1220def entropy_check(target='*', target_type='glob', minimum_bits=700, ignore_dead=False, **kwargs):
1221
1222 ''' Check entropy size in cluster '''
1223
1224 agent = "entropy size status"
1225 out = __salt__['saltutil.cmd']( tgt=target,
1226 tgt_type=target_type,
1227 fun='health_checks.get_entropy',
1228 timeout=3
1229 ) or None
1230
1231 if not _minions_output(out, agent, ignore_dead):
1232 __context__['retcode'] = 2
1233 return False
1234
1235 failed_minions = []
1236 verified_minions = []
1237
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001238 for minion in out:
1239 verified_minions.append(minion)
1240 entropy = int(out[minion]['ret'])
1241 if entropy < minimum_bits:
1242 if not minion in failed_minions:
1243 failed_minions.append(minion)
1244
1245 if not _failed_minions(out, agent, failed_minions):
1246 __context__['retcode'] = 2
1247 return False
1248
1249 if kwargs.get("debug", False):
1250 logger.info(verified_minions)
1251
1252 return True
1253
1254
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001255def docker_registry_list(host):
1256
1257 ''' Retrieve and list docker catalog '''
1258
1259 try:
1260 if host[0:4] == 'http':
1261 url = host + '/v2/'
1262 else:
1263 url = 'http://' + host + '/v2/'
1264 repos = requests.get(url + '_catalog')
1265
1266 versions = {}
1267 for repo in repos.json()['repositories']:
1268 repo_versions = requests.get(url + repo + '/tags/list')
1269 versions[repo] = repo_versions.json().pop('tags')
1270 return versions
1271 except:
1272 return {}
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001273
1274
1275def docker_ps(list_all=0):
1276
1277 import docker
1278 client = docker.client.Client(base_url='unix://var/run/docker.sock')
1279 return client.containers(all=list_all)
1280
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +02001281
1282def zookeeper_cmd(cmd, hostname='localhost', port=2181):
1283
1284 ''' Execute zookeeper cmd via socket '''
1285
1286 buf_size = 1024
1287 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1288 sock.connect((hostname, port))
1289 sock.sendall(cmd)
1290 sock.shutdown(socket.SHUT_WR)
1291 rdata = ""
1292 while 1:
1293 data = sock.recv(buf_size)
1294 if data == "":
1295 break
1296 rdata += data
1297 sock.close()
1298 return rdata
1299
1300
1301def zookeeper_stats():
1302
1303 ''' Retrieve zookeeper stats '''
1304
1305 stats = {}
1306 stats['configuration'] = {}
1307 for line in zookeeper_cmd('conf').split('\n'):
1308 if line:
1309 key, value = line.split('=')
1310 if value.strip().isdigit():
1311 value = int(value)
1312 else:
1313 value = value.strip()
1314 stats['configuration'][key.strip().lower().replace(' ', '_')] = value
1315
1316 stats['environment'] = {}
1317 for line in zookeeper_cmd('envi').split('\n')[1:]:
1318 if line:
1319 key, value = line.split('=')
1320 if value.strip().isdigit():
1321 value = int(value)
1322 else:
1323 value = value.strip()
1324 stats['environment'][key.strip().lower().replace(' ', '_')] = value
1325
1326 stats['server'] = {}
1327 for line in zookeeper_cmd('srvr').split('\n'):
1328 if line:
1329 if re.findall('^Zookeeper version:', line, flags=re.IGNORECASE):
1330 version_str = line.split(':')[1].strip()
1331 version = version_str
1332 if '-' in version_str:
1333 version_str = version_str.split('-')[0]
1334 if '.' in version_str:
1335 version = []
1336 version_list = version_str.split('.')
1337 for elem in version_list:
1338 if elem.strip().isdigit():
1339 version.append(int(elem))
1340 stats['server']['version'] = version
1341 continue
1342 if re.findall('^Latency min/avg/max:', line, flags=re.IGNORECASE):
1343 latency_min, latency_avg, latency_max = line.split(':')[1].strip().split('/')
1344 stats['server']['latency'] = {'min':int(latency_min),'max':int(latency_max),'avg':int(latency_avg)}
1345 continue
1346 key, value = line.split(':')
1347 if value.strip().isdigit():
1348 value = int(value)
1349 else:
1350 value = value.strip()
1351 stats['server'][key.strip().lower().replace(' ', '_')] = value
1352
1353 stats['clients'] = {}
1354 for line in zookeeper_cmd('cons').split('\n'):
1355 if line:
1356 clients = re.findall('^(\s*\/)(.+)(:\d+\[\d+\])(\(.+\))$', line)[0][1:]
1357 addr = clients[0]
1358 port, direction = re.findall('^(\d+)\[(\d+)\]$', clients[1][1:])[0]
1359 client = '['+addr+']:'+str(port)
1360 stats['clients'][client] = {'direction': int(direction)}
1361 for elem in clients[2][1:-1].split(','):
1362 key, value = elem.split('=')
1363 if value.strip().isdigit():
1364 value = int(value)
1365 else:
1366 value = value.strip()
1367 stats['clients'][client][key.strip().lower().replace(' ', '_')] = value
1368
1369 return stats
1370
1371
1372def get_zookeeper_leader(target='I@opencontrail:control', target_type='compound', ignore_dead=False, **kwargs):
1373
1374 ''' Retrieve zookeeper leader '''
1375
1376 agent = "zookeeper leader retrieve"
1377 out = __salt__['saltutil.cmd']( tgt=target,
1378 tgt_type=target_type,
1379 fun='health_checks.zookeeper_stats',
1380 timeout=3
1381 ) or None
1382
1383 if not _minions_output(out, agent, ignore_dead):
1384 __context__['retcode'] = 2
1385 return False
1386
1387 leader = None
1388 for minion in out:
1389 zookeeper_mode = out[minion]['ret']['server']['mode']
1390
1391 if zookeeper_mode == 'leader':
1392 leader = minion
1393
1394 return leader
1395
1396
1397def contrail_vrouter_list(api_host='127.0.0.1', api_port=9100):
1398
1399 ''' Retrieve and list contrail vrouters.
1400 Valid targets: Contrail controllers.
1401 '''
1402
1403 try:
1404 if api_host[0:4] == 'http':
1405 url = api_host + ':' + str(api_port)
1406 else:
1407 url = 'http://' + api_host + ':' + str(api_port)
1408
1409 vrouters = requests.get(url + '/virtual-routers').json()
1410 vrouter_list = []
1411 for vr in vrouters['virtual-routers']:
1412 vr_uuid = vr['uuid']
1413 for name in vr['fq_name']:
1414 if name == "default-global-system-config":
1415 continue
1416 else:
1417 vr_name = name
1418 vrouter_list.append({'name': vr_name, 'uuid': vr_uuid})
1419 return vrouter_list
1420
1421 except:
1422 return {}
1423
1424
1425def contrail_vrouter_show(vr_uuid, api_host='127.0.0.1', api_port=9100):
1426
1427 ''' Retrieve contrail vrouter data
1428 Valid targets: Contrail controllers.
1429 '''
1430
1431 try:
1432 if api_host[0:4] == 'http':
1433 url = api_host + ':' + str(api_port)
1434 else:
1435 url = 'http://' + api_host + ':' + str(api_port)
1436
1437 return requests.get(url + '/virtual-router/' + vr_uuid).json()
1438
1439 except:
1440 return {}
1441
1442
1443def _xmletree_descend_child(given_child, tag_requested):
1444
1445 ''' Returns xmletree subelement by tag name '''
1446
1447 my_child = {}
1448
1449 for child in given_child:
1450 if child.tag == tag_requested:
1451 my_child = child
1452 break
1453
1454 return my_child
1455
1456
1457def contrail_vrouter_agent_status(api_host='127.0.0.1', api_port=8085):
1458
1459 ''' Retrieve contrail vrouter agent status '''
1460
1461 import xml.etree.ElementTree as ET
1462
1463 if api_host[0:4] == 'http':
1464 url = api_host + ':' + str(api_port)
1465 else:
1466 url = 'http://' + api_host + ':' + str(api_port)
1467
1468 try:
1469 req = requests.get(url + '/Snh_SandeshUVECacheReq?x=NodeStatus')
1470 if int(req.status_code) != 200:
1471 return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
1472 except:
1473 pass
1474
1475 try:
1476 xmletree = ET.fromstring(req.text)
1477 except:
1478 return "Could not parse xml tree %s" % str(req.text)
1479
1480 try:
1481 vrouter_data = {}
1482 child = _xmletree_descend_child(xmletree, 'NodeStatusUVE')
1483 child = _xmletree_descend_child(child, 'data')
1484 child = _xmletree_descend_child(child, 'NodeStatus')
1485 child = _xmletree_descend_child(child, 'process_status')
1486 child = _xmletree_descend_child(child, 'list')
1487 child = _xmletree_descend_child(child, 'ProcessStatus')
1488 vrouter_data['state'] = _xmletree_descend_child(child, 'state').text
1489 vrouter_data['connections'] = []
1490 child = _xmletree_descend_child(child, 'connection_infos')
1491 for elem in _xmletree_descend_child(child, 'list'):
1492 conn = {}
1493 conn['type'] = _xmletree_descend_child(elem,'type').text
1494 conn['name'] = _xmletree_descend_child(elem,'name').text
1495 conn['status'] = _xmletree_descend_child(elem,'status').text
1496 conn['description'] = _xmletree_descend_child(elem,'description').text
1497 conn['server_addrs'] = []
1498 server_addrs = _xmletree_descend_child(elem,'server_addrs')
1499 for srv in _xmletree_descend_child(server_addrs,'list'):
1500 host, port = srv.text.split(':')
1501 conn['server_addrs'].append({'host': host, 'port': port})
1502 vrouter_data['connections'].append(conn)
1503 return vrouter_data
1504 except:
1505 return "Unsupported xml tree for this function %s" % str(req.text)
1506
1507
Dzmitry Stremkouski36290202019-05-05 21:26:25 +02001508def contrail_collector_agent_status(vr_name, api_host='auto', api_port=9081):
1509
1510 ''' Retrieve contrail vrouter agent status from analyticsdb '''
1511
1512 if api_host[0:4] == 'http':
1513 url = api_host + ':' + str(api_port)
1514 elif api_host == 'auto':
1515 my_ip = __salt__['pillar.get']('_param:opencontrail_analytics_address')
1516 url = 'http://' + my_ip+ ':' + str(api_port)
1517 else:
1518 url = 'http://' + api_host + ':' + str(api_port)
1519
1520 req = requests.get(url + '/analytics/uves/vrouter/' + vr_name + '?flat')
1521 if int(req.status_code) != 200:
1522 return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
1523
1524 return json.loads(req.text)
1525
1526
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001527def contrail_control_peers_summary(api_host='auto', api_port=8083):
1528
1529 ''' Retrieve contrail control peers summary '''
1530
1531 import xml.etree.ElementTree as ET
1532
1533 if api_host[0:4] == 'http':
1534 url = api_host + ':' + str(api_port)
1535 elif api_host == 'auto':
1536 my_ip = '127.0.0.1'
1537 url = 'http://' + my_ip+ ':' + str(api_port)
1538 else:
1539 url = 'http://' + api_host + ':' + str(api_port)
1540
1541 req = requests.get(url + '/Snh_ShowBgpNeighborSummaryReq')
1542 if int(req.status_code) != 200:
1543 return "Could not fetch data from contrail control via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
1544
1545 peers = []
1546 summary = req.text
1547
1548 try:
1549 xmletree = ET.fromstring(summary)
1550 for elem in xmletree.find('.//list'):
1551 attrs = {}
1552 for child in elem:
1553 attrs[child.tag] = child.text
1554 peers.append(attrs)
1555 except:
1556 return "Could not parse xml tree %s" % str(summary)
1557
1558 return peers
1559
1560
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001561def contrail_control_peer_status(api_host='auto', api_port=8083, fields=default_peer_filter):
1562
1563 ''' Contrail control peer status '''
1564
1565 peer_status = {}
1566
1567 for peer_elem in contrail_control_peers_summary():
1568 elem = {}
1569 for attr in peer_elem:
1570 if attr in fields:
1571 elem[attr] = peer_elem[attr]
1572
1573 peer_name = peer_elem["peer"]
1574 peer_status[peer_name] = elem
1575
1576 return peer_status
1577
1578
Dzmitry Stremkouski36290202019-05-05 21:26:25 +02001579def _get_object(json_obj, obj_path):
1580
1581 ''' Retrieve subelemet of an JSON object or value '''
1582
1583 if ':' in obj_path:
1584 splitter = obj_path.split(':')
1585 k = splitter[0]
1586 v = ':'.join(splitter[1:])
1587 if k.isdigit():
1588 # Return specific element path
1589 return [ _get_object(json_obj[int(k)], v) ]
1590 elif k == '*':
1591 l = []
1592 for el in json_obj:
1593 l.append(_get_object(el, v))
1594 # Return all list elements from the path
1595 return l
1596 else:
1597 # Contrail output may have nested JSON
1598 if isinstance(json_obj, str) or isinstance(json_obj, unicode):
1599 json_obj = json.loads(json_obj)
1600 # Assume list. Return it
1601 return { k: _get_object(json_obj[k], v) }
1602 else:
1603 return { obj_path: json_obj[obj_path] }
1604
1605
1606def _deepmerge(o1, o2):
1607
1608 ''' Deep merge JSON objects '''
1609
1610 o3 = {}
1611 if type(o1) == type(o2):
1612 if type(o1) == dict or type(o1) == tuple:
1613 for k in set(o1.keys() + o2.keys()):
1614 if k in o1:
1615 if k in o2:
1616 o3[k] = _deepmerge(o1[k], o2[k])
1617 else:
1618 o3[k] = o1[k]
1619 else:
1620 o3[k] = o2[k]
1621 elif type(o1) == list or type(o1) == set:
1622 o3 = [] + o2
1623 for el in o3:
1624 i = o3.index(el)
1625 o3[i] = _deepmerge(o1[i], o2[i])
1626 else:
1627 o3 = o2
1628 else:
1629 o3 = o2
1630
1631 return o3
1632
1633
1634def contrail_vrouter_agent_info(vr_name, filter_map=default_vrouter_info_map):
1635
1636 ''' Retrieve filtered contrail vrouter agent info from analyticsdb '''
1637
1638 vr_agent_status = contrail_collector_agent_status(vr_name)
1639 vr_info = {}
1640 for conf in filter_map:
1641 vr_info[conf] = {}
1642 for el_path in filter_map[conf]:
1643 vr_info = _deepmerge(vr_info, { conf: _get_object(vr_agent_status[conf], el_path) } )
1644
1645 return vr_info
1646
1647
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001648def kafka_brokers_ids():
1649
1650 ''' Retrieve kafka brokers ids '''
1651
1652 brokers_ids = []
1653 for line in zookeeper_cmd('dump').split('\n'):
1654 if line:
1655 if '/brokers/ids/' in line:
1656 brokers_ids.append(int(line.split('/')[3]))
1657
1658 return brokers_ids
1659
1660
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +02001661def libvirt_capabilities():
1662
1663 ''' JSON formatted libvirtcapabilities list '''
1664
1665 import xml.etree.ElementTree as ET
1666
1667 try:
1668 proc = subprocess.Popen(['virsh', 'capabilities'], stdout=subprocess.PIPE)
1669 stdout, stderr = proc.communicate()
1670 xmletree = ET.fromstring(stdout)
1671 except:
1672 return "Could not parse xml tree %s" % str(stdout)
1673
1674 try:
1675 capabilities = {}
1676 for elem in xmletree:
1677 if elem.tag == "guest":
1678 for el in elem:
1679 if el.tag == 'arch':
1680 _name = el.attrib['name']
1681 capabilities[_name] = []
1682 for arch in el:
1683 if arch.tag == 'machine':
1684 if 'canonical' not in arch.attrib:
1685 capabilities[_name].append(arch.text)
1686
1687 return capabilities
1688 except:
1689 return "Unsupported xml tree for this function %s" % str(stdout)
1690
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001691
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001692def keystone_keys_attractor(keys_dir='/var/lib/keystone/fernet-keys', keys_ids=range(0,-4,-1)):
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001693
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001694 ''' JSON formatted dict of keystone keys sha256 sums '''
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001695
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001696 keys = os.listdir(keys_dir)
1697 keys.sort()
1698 keys_dict = {}
1699 try:
1700 for i in keys_ids:
1701 with open("%s/%s" % (keys_dir, str(keys[i])), 'r') as key_file:
1702 _iter1 = hashlib.sha256(key_file.read()).hexdigest()
1703 _iter2 = hashlib.sha256(_iter1).hexdigest()
1704 _iter3 = hashlib.sha256(_iter2).hexdigest()
1705 keys_dict[str(keys[i])] = _iter3
1706 except:
1707 pass
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001708
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001709 return keys_dict
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001710
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001711
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001712def keystone_keys_check(target='I@keystone:server', target_type='compound', ignore_dead=False, **kwargs):
1713
1714 ''' Check cluster keystone keys are in sync '''
1715
1716 keys_type = kwargs.get("keys_type", 'fernet')
1717
1718 supported_key_types = ['fernet', 'credential']
1719 if keys_type not in supported_key_types:
1720 logger.error("Unsupported keys type: %s" % str(keys_type))
1721 logger.error("Supported keys type are: %s" % str(supported_key_types))
1722 __context__['retcode'] = 2
1723 return False
1724
1725 agent = "keystone %s keys sync" % keys_type
1726 keys_dir_default = '/var/lib/keystone/%s-keys' % keys_type
1727 keys_dir = kwargs.get("keys_dir", keys_dir_default)
1728
1729 out = __salt__['saltutil.cmd']( tgt=target,
1730 tgt_type=target_type,
1731 fun='health_checks.keystone_keys_attractor',
1732 arg=["keys_dir='%s'" % keys_dir],
1733 timeout=3
1734 ) or None
1735
1736 if not _minions_output(out, agent, ignore_dead):
1737 __context__['retcode'] = 2
1738 return False
1739
1740 cluster_attractors = []
1741 failed_minions = []
1742 verified_minions = []
1743 attractor = {}
1744
1745 for minion in out:
1746 verified_minions.append(minion)
1747 attractor = out[minion]['ret']
1748 if attractor == {}:
1749 failed_minions.append(minion)
1750 if attractor not in cluster_attractors:
1751 cluster_attractors.append(attractor)
1752
1753 if not _failed_minions(out, agent, failed_minions):
1754 __context__['retcode'] = 2
1755 return False
1756
1757 if len(cluster_attractors) > 1:
1758 failed_minions = []
1759 for minion in out:
1760 failed_minions.append(minion)
1761
1762 if not _failed_minions(out, agent, failed_minions):
1763 __context__['retcode'] = 2
1764 return False
1765
1766 if kwargs.get("debug", False):
1767 logger.info("%s check done." % agent)
1768 logger.info(verified_minions)
1769
1770 return True
1771