blob: 97d1703213c279db6ca67b29b5439ffa273eff03 [file] [log] [blame]
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +02001import datetime
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02002import hashlib
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +02003import json
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +02004import logging
5import os
6import re
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +02007import requests
8import salt.utils
9import socket
10import subprocess
Dzmitry Stremkouski36290202019-05-05 21:26:25 +020011import yaml
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +020012
13__author__ = "Dzmitry Stremkouski"
14__copyright__ = "Copyright 2019, Mirantis Inc."
15__license__ = "Apache 2.0"
16
17logger = logging.getLogger(__name__)
18stream = logging.StreamHandler()
19logger.addHandler(stream)
20
Dzmitry Stremkouski36290202019-05-05 21:26:25 +020021try:
22 from yaml import CLoader as Loader, CDumper as Dumper
23except ImportError:
24 from yaml import Loader, Dumper
25
26default_vrouter_info_map = yaml.load("""
27ContrailConfig:
28- deleted
29- elements:uuid
30- elements:virtual_router_dpdk_enabled
31- elements:virtual_router_type
32VrouterAgent:
33- build_info:build-info:0:build-version
34- build_info:build-info:0:build-number
Dzmitry Stremkouski36290202019-05-05 21:26:25 +020035- config_file
36- control_ip
37- control_node_list_cfg
38- dns_server_list_cfg
39- dns_servers
40- down_interface_count
41- eth_name
42- headless_mode_cfg
43- hostname_cfg
44- hypervisor
45- mode
46- phy_if
47- platform
48- self_ip_list
49- total_interface_count
50- tunnel_type
51- vhost_cfg
52- vhost_if
53- vr_limits:max_interfaces
54- vr_limits:max_labels
55- vr_limits:max_mirror_entries
56- vr_limits:max_nexthops
57- vr_limits:max_vrfs
58- vr_limits:vrouter_max_bridge_entries
59- vr_limits:vrouter_max_flow_entries
60- vr_limits:vrouter_max_oflow_bridge_entries
61- vr_limits:vrouter_max_oflow_entries
62- xmpp_peer_list:*:ip
63- xmpp_peer_list:*:primary
64- xmpp_peer_list:*:status
65""", Loader=Loader)
66
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +020067default_peer_filter = ["encoding", "peer_address", "state"]
68
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +020069
70def _failed_minions(out, agent, failed_minions):
71
72 ''' Verify failed minions '''
73
74 if len(failed_minions) > 0:
75 logger.error("%s check FAILED" % agent)
76 logger.error("Some minions returned non-zero exit code or empty data")
77 logger.error("Failed minions:" + str(failed_minions))
78 for minion in failed_minions:
79 logger.error(minion)
80 logger.debug(str(out[minion]['ret']))
81 __context__['retcode'] = 2
82 return False
83
84 return True
85
86
87def _minions_output(out, agent, ignore_dead, ignore_empty=False):
88
89 ''' Verify minions output and exit code '''
90
91 if not out:
92 logger.error("%s check FAILED" % agent)
93 logger.error("No response from master cmd")
94 __context__['retcode'] = 2
95 return False
96
97 if not ignore_dead:
98 jid = out.itervalues().next()['jid']
99 job_stats = __salt__['saltutil.runner']( 'jobs.print_job', arg=[jid] ) or None
100 if not job_stats:
101 logger.error("%s check FAILED" % agent)
102 logger.error("No response from master runner")
103 __context__['retcode'] = 2
104 return False
105
106 job_result = job_stats[jid]['Result']
107 job_minions = job_stats[jid]['Minions']
108 if len(job_minions) != len(job_result):
109 logger.error("%s check FAILED" % agent)
110 logger.error("Some minions are offline")
111 logger.error(list(set(job_minions) - set(job_result.keys())))
112 __context__['retcode'] = 2
113 return False
114
115 failed_minions = []
116 for minion in out:
117 if 'retcode' in out[minion]:
118 if out[minion]['retcode'] == 0:
119 if not ignore_empty:
120 if isinstance(out[minion]['ret'], bool):
121 if minion not in failed_minions:
122 failed_minions.append(minion)
123 elif len(out[minion]['ret']) == 0:
124 if minion not in failed_minions:
125 failed_minions.append(minion)
126 else:
127 if minion not in failed_minions:
128 failed_minions.append(minion)
129 else:
130 if minion not in failed_minions:
131 failed_minions.append(minion)
132
133 if not _failed_minions(out, agent, failed_minions):
134 __context__['retcode'] = 2
135 return False
136
137 return True
138
139
140def minions_check(wait_timeout=1, gather_job_wait_timeout=1, target='*', target_type='glob', ignore_dead=False):
141
142 ''' Verify minions are online '''
143
144 agent = "Minions"
145 out = __salt__['saltutil.cmd']( tgt=target,
146 tgt_type=target_type,
147 fun='test.ping',
148 timeout=wait_timeout,
149 gather_job_timeout=gather_job_wait_timeout
150 ) or None
151
152 return _minions_output(out, agent, ignore_dead, ignore_empty=True)
153
154
155def time_diff_check(time_diff=1, target='*', target_type='glob', ignore_dead=False, **kwargs):
156
157 ''' Verify time diff on servers '''
158
159 agent = "Time diff"
160 out = __salt__['saltutil.cmd']( tgt=target,
161 tgt_type=target_type,
162 fun='status.time',
163 arg=['%s'],
164 timeout=3
165 ) or None
166
167 if not _minions_output(out, agent, ignore_dead):
168 __context__['retcode'] = 2
169 return False
170
171 minions_times = {}
172 env_times = []
173 verified_minions = []
174
175 for minion in out:
176 verified_minions.append(minion)
177 if out[minion]['retcode'] == 0:
178 minion_time = int(out[minion]['ret'])
179 if str(minion_time) not in minions_times:
180 minions_times[str(minion_time)] = []
181 minions_times[str(minion_time)].append(minion)
182 env_times.append(minion_time)
183
184 env_times.sort()
185 diff = env_times[-1] - env_times[0]
186
187 if diff > time_diff:
188 __context__['retcode'] = 2
189 if kwargs.get("debug", False):
190 return False, minions_times
191 else:
192 return False
193
194 if kwargs.get("debug", False):
195 logger.info(verified_minions)
196 return True
197
198
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200199def contrail_process_list(**kwargs):
200
201 ''' Retrieve contrail process pids and start_time '''
202
203 cmd = ['contrail-status', '-d']
204
205 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
206 stdout, stderr = proc.communicate()
207
208 procs = {}
209 for line in stdout.split('\n'):
210 if re.findall('^(\S+).*pid ([0-9]+),.*$', line):
211 stat = line.split()
212 procs[stat[0]] = int(stat[3][:-1])
213
214 if kwargs.get('role', 'compute') == 'controller':
215
216 for service in ['zookeeper', 'ifmap-server']:
217 cmd = ['service', service, 'status']
218
219 proc = subprocess.Popen(cmd, stdout=subprocess.PIPE)
220 stdout, stderr = proc.communicate()
221
222 for line in stdout.split('\n'):
223 if re.findall('^(\S+).*process ([0-9]+)$', line):
224 stat = line.split()
225 procs[stat[0]] = int(stat[3])
226
227 ctime = int(datetime.datetime.now().strftime("%s"))
228 btime_re = re.compile(r"^btime (\d+)$", re.MULTILINE)
229 btime_groups = btime_re.search(open("/proc/stat").read())
230 btime = int(btime_groups.groups()[0])
231 clk_tck = os.sysconf(os.sysconf_names["SC_CLK_TCK"])
232 for proc in procs:
233 pid = procs[proc]
234 try:
235 with open('/proc/%s/stat' % str(pid), 'r') as f:
236 stat = f.read()
237 jitty_time = int(stat.split(') ')[1].split()[19]) / clk_tck
238 proc_uptime = ctime - btime - int(jitty_time)
239 except:
240 proc_uptime = 0
241 procs[proc] = { 'pid': pid, 'uptime': proc_uptime }
242
243 return procs
244
245
246def contrail_check(target='I@opencontrail:control or I@opencontrail:collector or I@opencontrail:compute', nodetool_target='I@opencontrail:control or I@opencontrail:collector', compute_target='I@opencontrail:compute', target_type='compound', nodetool_target_type='compound', compute_target_type='compound', nodetool_expected_size=3, proc_min_uptime=30, ignore_dead=False, **kwargs):
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200247
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200248 ''' Verify contrail infrastructure '''
249
250 use_doctrail = False
251 oc_ver = str(__salt__['pillar.get']('_param:opencontrail_version'))
252 if len(oc_ver) > 1:
253 if oc_ver[0] == '4':
254 use_doctrail = True
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200255
256 agent = "Contrail status"
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200257 if use_doctrail:
258 # Compute nodes does not use doctrail still, but are in compund.
259 # In order to minimize salt calls we are using exception pipes
260 arg_cmd = 'test $(whereis -b doctrail | grep -c " ") -eq 0 && contrail-status || doctrail all contrail-status'
261 else:
262 arg_cmd = "contrail-status"
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200263
264 # Check #1 contrail-status
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200265 out = __salt__['saltutil.cmd']( tgt=target,
266 tgt_type=target_type,
267 fun='cmd.run',
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200268 arg=[arg_cmd],
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200269 timeout=5
270 ) or None
271
272 if not _minions_output(out, agent, ignore_dead):
273 __context__['retcode'] = 2
274 return False
275
276 failed_minions = []
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200277 pattern = '^(==|\*+$|$|\S+\s+(active|backup|inactive\s\(disabled\son\sboot\)))'
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200278 prog = re.compile(pattern)
279
280 validated = []
281 for minion in out:
282 for line in out[minion]['ret'].split('\n'):
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200283 check_line = True
284 if " FOR NODE " in line:
285 check_line = False
286 if check_line and not prog.match(line) and minion not in failed_minions:
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200287 failed_minions.append(minion)
288 validated.append(minion)
289
290 if not _failed_minions(out, agent, failed_minions):
291 __context__['retcode'] = 2
292 return False
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200293 if kwargs.get("debug", False):
294 logger.info(validated)
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200295
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200296 if use_doctrail:
297 arg_cmd = "doctrail all nodetool status"
298 else:
299 arg_cmd = "nodetool status"
300
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200301 # Check #2 nodetool
302 out = __salt__['saltutil.cmd']( tgt=nodetool_target,
303 tgt_type=nodetool_target_type,
304 fun='cmd.run',
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200305 arg=[arg_cmd],
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200306 timeout=5
307 ) or None
308
309 if not _minions_output(out, agent, ignore_dead):
310 __context__['retcode'] = 2
311 return False
312
313 failed_minions = []
314 pattern = '^UN'
315 prog = re.compile(pattern)
316
317 validated = []
318 for minion in out:
319 size = 0
320 for line in out[minion]['ret'].split('\n'):
321 if prog.match(line):
322 size += 1
323 if not size == nodetool_expected_size and minion not in failed_minions:
324 failed_minions.append(minion)
325 validated.append(minion)
326
327 if not _failed_minions(out, agent, failed_minions):
328 __context__['retcode'] = 2
329 return False
330 if kwargs.get("debug", False):
331 logger.info(validated)
332
333 # Check #3 process status control
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200334
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200335 # Contrail 4.x does not produce pid info from contrail-status -d
336 # Will skip this check and use another method further
337 # TODO: check process list state for oc4 env
338 if not use_doctrail:
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200339
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200340 out = __salt__['saltutil.cmd']( tgt=nodetool_target,
341 tgt_type=nodetool_target_type,
342 fun='health_checks.contrail_process_list',
343 arg=['role=controller'],
344 timeout=5
345 ) or None
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200346
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200347 if not _minions_output(out, agent, ignore_dead):
348 __context__['retcode'] = 2
349 return False
350
351 failed_minions = []
352 validated = []
353 for minion in out:
354 procs = out[minion]['ret']
355 for proc in procs:
356 proc_uptime = procs[proc]['uptime']
357 if proc_uptime < proc_min_uptime:
358 if minion not in failed_minions:
359 failed_minions.append(minion)
360 logger.error({'minion': minion, 'name': proc, 'uptime': proc_uptime})
361 validated.append(minion)
362
363 if not _failed_minions(out, agent, failed_minions):
364 __context__['retcode'] = 2
365 return False
366 if kwargs.get("debug", False):
367 logger.info(validated)
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200368
369 # Check #4 process status computes
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200370
371 # Contrail 4.x does not produce pid info from contrail-status -d
372 # Will skip this check and use another method further
373 # TODO: check process list state for oc4 env
374 if not use_doctrail:
375
376 out = __salt__['saltutil.cmd']( tgt=compute_target,
377 tgt_type=compute_target_type,
378 fun='health_checks.contrail_process_list',
379 timeout=5
380 ) or None
381
382 if not _minions_output(out, agent, ignore_dead):
383 __context__['retcode'] = 2
384 return False
385
386 failed_minions = []
387 validated = []
388 for minion in out:
389 procs = out[minion]['ret']
390 for proc in procs:
391 proc_uptime = procs[proc]['uptime']
392 if proc_uptime < proc_min_uptime:
393 if minion not in failed_minions:
394 failed_minions.append(minion)
395 logger.error({'minion': minion, 'name': proc, 'uptime': proc_uptime})
396 validated.append(minion)
397
398 if not _failed_minions(out, agent, failed_minions):
399 __context__['retcode'] = 2
400 return False
401 if kwargs.get("debug", False):
402 logger.info(validated)
403
404 # Check # 5 compute vrouter namespaces dumplicates check
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200405 out = __salt__['saltutil.cmd']( tgt=compute_target,
406 tgt_type=compute_target_type,
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200407 fun='health_checks.list_namespaces',
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200408 timeout=5
409 ) or None
410
411 if not _minions_output(out, agent, ignore_dead):
412 __context__['retcode'] = 2
413 return False
414
415 failed_minions = []
416 validated = []
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200417 all_namespaces = []
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200418 for minion in out:
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200419 namespaces = out[minion]['ret']
420 for ns in namespaces:
421 if ns['uuid'] not in all_namespaces:
422 all_namespaces.append(ns['uuid'])
423 else:
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200424 if minion not in failed_minions:
425 failed_minions.append(minion)
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200426 logger.error({'minion': minion, 'uuid': ns['uuid']})
427 validated.append(minion)
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200428
429 if not _failed_minions(out, agent, failed_minions):
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200430 logger.error("Duplicated SNAT vrouters found. Please reset their gateways")
Dzmitry Stremkouski9d2a5562019-08-08 19:56:38 +0200431 __context__['retcode'] = 2
432 return False
433 if kwargs.get("debug", False):
434 logger.info(validated)
435
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +0200436 # TODO: peers check
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200437 return True
438
439
440def galera_check(cluster_size=3, target='I@galera:master or I@galera:slave', target_type='compound', ignore_dead=False, **kwargs):
441
442 ''' Verify galera cluster size and state '''
443
444 agent = "Galera status"
445 out = __salt__['saltutil.cmd']( tgt=target,
446 tgt_type=target_type,
447 fun='mysql.status',
448 timeout=3
449 ) or None
450
451 if not _minions_output(out, agent, ignore_dead):
452 __context__['retcode'] = 2
453 return False
454
455 failed_minions = []
456
457 validated = []
458 for minion in out:
459 if int(out[minion]['ret']['wsrep_cluster_size']) != int(cluster_size) and minion not in failed_minions:
460 failed_minions.append(minion)
461 if out[minion]['ret']['wsrep_evs_state'] != 'OPERATIONAL' and minion not in failed_minions:
462 failed_minions.append(minion)
463 validated.append(minion)
464
465 if not _failed_minions(out, agent, failed_minions):
466 __context__['retcode'] = 2
467 return False
468
469 if kwargs.get("debug", False):
470 logger.info(validated)
471 logger.info("Cluster size: " + str(out[validated[0]]['ret']['wsrep_cluster_size']))
472 logger.info("Cluster state: " + str(out[validated[0]]['ret']['wsrep_evs_state']))
473 return True
474
475
476def _quote_str(s, l=False, r=False):
477
478 ''' Quting rabbitmq erl objects for json import '''
479
480 if len(s) > 0:
481 if l:
482 s = s.lstrip()
483 if r:
484 s = s.rstrip()
485 if (s[0] == "'") and (s[-1] != "'") and r and not l:
486 s += "'"
487 if (s[0] == '"') and (s[-1] != '"') and r and not l:
488 s += '"'
489 if (s[-1] == "'") and (s[0] != "'") and l and not r:
490 s = "'" + s
491 if (s[-1] == '"') and (s[0] != '"') and l and not r:
492 s = '"' + s
493 if (s[-1] != "'") and (s[-1] != '"') and (s[0] != "'") and (s[0] != '"'):
494 s = '"' + s.replace('"', '\\\"') + '"'
495 else:
496 if (not l) and (not r) and s[0] != '"' and not s[-1] != '"':
497 s= s.replace('"', '\\\"')
498 return s.replace("'", '"')
499 else:
500 return s
501
502
503def _sanitize_rmqctl_output(string):
504
505 ''' Sanitizing rabbitmq erl objects for json import '''
506
507 rabbitctl_json = ""
508 for line in string.split(','):
509 copy = line
510 left = ""
511 right = ""
512 mid = copy
513 lpar = False
514 rpar = False
515 if re.search('([\[\{\s]+)(.*)', copy):
516 mid = re.sub('^([\[\{\s]+)','', copy)
517 left = copy[:-len(mid)]
518 copy = mid
519 lpar = True
520 if re.search('(.*)([\]\}\s]+)$', copy):
521 mid = re.sub('([\]\}\s]+)$','', copy)
522 right = copy[len(mid):]
523 copy = mid
524 rpar = True
525 result = left + _quote_str(mid, l=lpar, r=rpar) + right
526 if (not rpar) and lpar and (len(left.strip()) > 0) and (left.strip()[-1] == '{'):
527 result += ":"
528 else:
529 result += ","
530 rabbitctl_json += result
531
532 rabbitctl_json = rabbitctl_json[:-1]
533 new_rabbitctl_json = rabbitctl_json
534 for s in re.findall('"[^:\[{\]}]+"\s*:\s*("[^\[{\]}]+")', rabbitctl_json):
535 if '"' in s[1:][:-1]:
536 orig = s
537 changed = '"' + s.replace('\\', '\\\\').replace('"', '\\\"') + '"'
538 new_rabbitctl_json = new_rabbitctl_json.replace(orig, changed)
539 return new_rabbitctl_json
540
541
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +0200542def rabbitmq_list_queues(vhost='/'):
543
544 ''' JSON formatted RabbitMQ queues list '''
545
546 proc = subprocess.Popen(['rabbitmqctl', 'list_queues' , '-p', vhost], stdout=subprocess.PIPE)
547 stdout, stderr = proc.communicate()
548
549 queues = {}
550 for line in stdout.split('\n'):
551 if re.findall('[0-9]$', line):
552 queue_name, num = re.sub(r"\s+", " ", line).split()
553 queues[queue_name] = int(num)
554
555 return queues
556
557
558def rabbitmq_list_vhosts():
559
560 ''' JSON formatted RabbitMQ vhosts list '''
561
562 proc = subprocess.Popen(['rabbitmqctl', 'list_vhosts'], stdout=subprocess.PIPE)
563 stdout, stderr = proc.communicate()
564
565 vhosts = []
566 for line in stdout.split('\n'):
567 if re.findall('^/', line):
568 vhosts.append(line)
569
570 return vhosts
571
572
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200573def rabbitmq_cmd(cmd):
574
575 ''' JSON formatted RabbitMQ command output '''
576
577 supported_commands = ['status', 'cluster_status', 'list_hashes', 'list_ciphers']
578 if cmd not in supported_commands:
579 logger.error("Command is not supported yet, sorry")
580 logger.error("Supported commands are: " + str(supported_commands))
581 __context__['retcode'] = 2
582 return False
583
584 proc = subprocess.Popen(['rabbitmqctl', cmd], stdout=subprocess.PIPE)
585 stdout, stderr = proc.communicate()
586
587 rabbitmqctl_cutoff = stdout[int(stdout.find('[')):int(stdout.rfind(']'))+1].replace('\n','')
588 return json.loads(_sanitize_rmqctl_output(rabbitmqctl_cutoff))
589
590
591def rabbitmq_check(target='I@rabbitmq:server', target_type='compound', ignore_dead=False, **kwargs):
592
593 ''' Verify rabbit cluster and it's alarms '''
594
595 agent = "RabbitMQ status"
596 out = __salt__['saltutil.cmd']( tgt=target,
597 tgt_type=target_type,
598 fun='health_checks.rabbitmq_cmd',
599 arg=['cluster_status'],
600 timeout=3
601 ) or None
602
603 if not _minions_output(out, agent, ignore_dead):
604 __context__['retcode'] = 2
605 return False
606
607 failed_minions = []
608
609 for minion in out:
610 rabbitmqctl_json = out[minion]['ret']
611 running_nodes = []
612 available_nodes = []
613 alarms = []
614 for el in rabbitmqctl_json:
615 if 'alarms' in el:
616 alarms = el['alarms']
617 if 'nodes' in el:
618 available_nodes = el['nodes'][0]['disc']
619 if 'running_nodes' in el:
620 running_nodes = el['running_nodes']
621
622 if running_nodes.sort() == available_nodes.sort():
623 nodes_alarms = []
624 for node in running_nodes:
625 for el in alarms:
626 if node in el:
627 if len(el[node]) > 0:
628 nodes_alarms.append(el[node])
629 if len(nodes_alarms) > 0:
630 failed_minions.append(minion)
631 else:
632 failed_minions.append(minion)
633
634 if not _failed_minions(out, agent, failed_minions):
635 __context__['retcode'] = 2
636 return False
637
638 if kwargs.get("debug", False):
639 logger.info(running_nodes)
640 return True
641
642
643def haproxy_status(socket_path='/run/haproxy/admin.sock', buff_size = 8192, encoding = 'UTF-8', stats_filter=[]):
644
645 ''' JSON formatted haproxy status '''
646
647 stat_cmd = 'show stat\n'
648
649 if not os.path.exists(socket_path):
650 logger.error('Socket %s does not exist or haproxy not running' % socket_path)
651 __context__['retcode'] = 2
652 return False
653
654 client = socket.socket( socket.AF_UNIX, socket.SOCK_STREAM)
655 client.connect(socket_path)
656 stat_cmd = 'show stat\n'
657
658 client.send(bytearray(stat_cmd, encoding))
659 output = client.recv(buff_size)
660
661 res = ""
662 while output:
663 res += output.decode(encoding)
664 output = client.recv(buff_size)
665 client.close()
666
667 haproxy_stats = {}
668 res_list = res.split('\n')
669 fields = res_list[0][2:].split(',')
670 stats_list = []
671 for line in res_list[1:]:
672 if len(line.strip()) > 0:
673 stats_list.append(line)
674
675 for i in range(len(stats_list)):
676 element = {}
677 for n in fields:
678 element[n] = stats_list[i].split(',')[fields.index(n)]
679 server_name = element.pop('pxname')
680 server_type = element.pop('svname')
681 if stats_filter:
682 filtered_element = element.copy()
683 for el in element:
684 if el not in stats_filter:
685 filtered_element.pop(el)
686 element = filtered_element
687 if server_name not in haproxy_stats:
688 haproxy_stats[server_name] = {}
689 if server_type == "FRONTEND" or server_type == "BACKEND":
690 haproxy_stats[server_name][server_type] = element
691 else:
692 if 'UPSTREAM' not in haproxy_stats[server_name]:
693 haproxy_stats[server_name]['UPSTREAM'] = {}
694 haproxy_stats[server_name]['UPSTREAM'][server_type] = element
695
696 return haproxy_stats
697
698
699def haproxy_check(target='I@haproxy:proxy', target_type='compound', ignore_dead=False, ignore_services=[], ignore_upstreams=[], ignore_no_upstream=False, **kwargs):
700
701 ''' Verify haproxy backends status '''
702
703 agent = "haproxy status"
704 out = __salt__['saltutil.cmd']( tgt=target,
705 tgt_type=target_type,
706 fun='health_checks.haproxy_status',
707 arg=["stats_filter=['status']"],
708 timeout=3
709 ) or None
710
711 if not _minions_output(out, agent, ignore_dead):
712 __context__['retcode'] = 2
713 return False
714
715 failed_minions = []
716 verified_minions = []
717 for minion in out:
718 verified_minions.append(minion)
719 haproxy_json = out[minion]['ret']
720 for service in haproxy_json:
721 if service not in ignore_services:
722 if haproxy_json[service]['FRONTEND']['status'] != 'OPEN':
723 if minion not in failed_minions:
724 failed_minions.append(minion)
725 if haproxy_json[service]['BACKEND']['status'] != 'UP':
726 if minion not in failed_minions:
727 failed_minions.append(minion)
728 if 'UPSTREAM' in haproxy_json[service]:
729 for upstream in haproxy_json[service]['UPSTREAM']:
730 if upstream not in ignore_upstreams:
731 if haproxy_json[service]['UPSTREAM'][upstream]['status'] != 'UP':
732 if minion not in failed_minions:
733 failed_minions.append(minion)
734 else:
735 if not ignore_no_upstream:
736 if minion not in failed_minions:
737 failed_minions.append(minion)
738
739 if not _failed_minions(out, agent, failed_minions):
740 __context__['retcode'] = 2
741 return False
742
743 if kwargs.get("debug", False):
744 logger.info(verified_minions)
745 return True
746
747
748def df_check(target='*', target_type='glob', verify='space', space_limit=80, inode_limit=80, ignore_dead=False, ignore_partitions=[], **kwargs):
749
750 ''' Verify storage space/inodes status '''
751
752 supported_options = ['space', 'inodes']
753 if verify not in supported_options:
754 logger.error('Unsupported "verify" option.')
755 logger.error('Supported options are: %s' % str(supported_options))
756 __context__['retcode'] = 2
757 return False
758
759 if verify == 'space':
760 fun_cmd = 'disk.usage'
761 json_arg = 'capacity'
762 limit = space_limit
763 elif verify == 'inodes':
764 fun_cmd = 'disk.inodeusage'
765 json_arg = 'use'
766 limit = inode_limit
767
768 agent = "df status"
769 out = __salt__['saltutil.cmd']( tgt=target,
770 tgt_type=target_type,
771 fun=fun_cmd,
772 timeout=3
773 ) or None
774
775 if not _minions_output(out, agent, ignore_dead):
776 __context__['retcode'] = 2
777 return False
778
779 failed_minions = []
780 verified_minions = []
781 for minion in out:
782 verified_minions.append(minion)
783 df_json = out[minion]['ret']
784 for disk in df_json:
785 if disk not in ignore_partitions:
786 if int(df_json[disk][json_arg][:-1]) > int(limit):
787 if minion not in failed_minions:
788 failed_minions.append(minion)
789
790 if not _failed_minions(out, agent, failed_minions):
791 __context__['retcode'] = 2
792 return False
793
794 if kwargs.get("debug", False):
795 logger.info(verified_minions)
796 return True
797
798
799def load_check(target='*', target_type='glob', la1=3, la5=3, la15=3, ignore_dead=False, **kwargs):
800
801 ''' Verify load average status '''
802
803 agent = "load average status"
804 out = __salt__['saltutil.cmd']( tgt=target,
805 tgt_type=target_type,
806 fun='status.loadavg',
807 timeout=3
808 ) or None
809
810 if not _minions_output(out, agent, ignore_dead):
811 __context__['retcode'] = 2
812 return False
813
814 failed_minions = []
815 verified_minions = []
816 for minion in out:
817 verified_minions.append(minion)
818 la_json = out[minion]['ret']
819 if float(la_json['1-min']) > float(la1):
820 if minion not in failed_minions:
821 failed_minions.append(minion)
822 if float(la_json['5-min']) > float(la5):
823 if minion not in failed_minions:
824 failed_minions.append(minion)
825 if float(la_json['15-min']) > float(la15):
826 if minion not in failed_minions:
827 failed_minions.append(minion)
828
829 if not _failed_minions(out, agent, failed_minions):
830 __context__['retcode'] = 2
831 return False
832
833 if kwargs.get("debug", False):
834 logger.info(verified_minions)
835 return True
836
837
838def netdev_check(target='*', target_type='glob', rx_drop_limit=0, tx_drop_limit=0, ignore_devices=[], ignore_dead=False, **kwargs):
839
840 ''' Verify netdev rx/tx drop status '''
841
842 agent = "netdev rx/tx status"
843 out = __salt__['saltutil.cmd']( tgt=target,
844 tgt_type=target_type,
845 fun='status.netdev',
846 timeout=3
847 ) or None
848
849 if not _minions_output(out, agent, ignore_dead):
850 __context__['retcode'] = 2
851 return False
852
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200853 failed_minions = {}
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200854 verified_minions = []
855 for minion in out:
856 verified_minions.append(minion)
857 dev_json = out[minion]['ret']
858 for netdev in dev_json:
859 if netdev not in ignore_devices:
860 if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit):
861 if minion not in failed_minions:
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200862 failed_minions[minion] = {}
863 if netdev not in failed_minions[minion]:
864 failed_minions[minion][netdev] = {}
865 failed_minions[minion][netdev]['rx_drop'] = int(dev_json[netdev]['rx_drop'])
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200866 if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit):
867 if minion not in failed_minions:
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +0200868 failed_minions[minion] = {}
869 if netdev not in failed_minions[minion]:
870 failed_minions[minion][netdev] = {}
871 failed_minions[minion][netdev]['tx_drop'] = int(dev_json[netdev]['tx_drop'])
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +0200872
873 if not _failed_minions(out, agent, failed_minions):
874 __context__['retcode'] = 2
875 return False
876
877 if kwargs.get("debug", False):
878 logger.info(verified_minions)
879 return True
880
881
882def mem_check(target='*', target_type='glob', used_limit=80, ignore_dead=False, **kwargs):
883
884 ''' Verify available memory status '''
885
886 agent = "available memory status"
887 out = __salt__['saltutil.cmd']( tgt=target,
888 tgt_type=target_type,
889 fun='status.meminfo',
890 timeout=3
891 ) or None
892
893 if not _minions_output(out, agent, ignore_dead):
894 __context__['retcode'] = 2
895 return False
896
897 failed_minions = []
898 verified_minions = []
899 for minion in out:
900 mem_avail = int(out[minion]['ret']['MemAvailable']['value'])
901 mem_total = int(out[minion]['ret']['MemTotal']['value'])
902 used_pct = float((mem_total - mem_avail) * 100 / mem_total)
903 if used_pct > float(used_limit):
904 if minion not in failed_minions:
905 failed_minions.append(minion)
906 else:
907 verified_minions.append( { minion : str(used_pct) + '%' } )
908
909 if not _failed_minions(out, agent, failed_minions):
910 __context__['retcode'] = 2
911 return False
912
913 if kwargs.get("debug", False):
914 logger.info(verified_minions)
915 return True
916
917
918def ntp_status(params = ['-4', '-p', '-n']):
919
920 ''' JSON formatted ntpq command output '''
921
922 ntp_states = [
923 { 'indicator': '#', 'comment': 'source selected, distance exceeds maximum value' },
924 { 'indicator': 'o', 'comment': 'source selected, Pulse Per Second (PPS) used' },
925 { 'indicator': '+', 'comment': 'source selected, included in final set' },
926 { 'indicator': 'x', 'comment': 'source false ticker' },
927 { 'indicator': '.', 'comment': 'source selected from end of candidate list' },
928 { 'indicator': '-', 'comment': 'source discarded by cluster algorithm' },
929 { 'indicator': '*', 'comment': 'current time source' },
930 { 'indicator': ' ', 'comment': 'source discarded high stratum, failed sanity' }
931 ]
932 ntp_state_indicators = []
933 for state in ntp_states:
934 ntp_state_indicators.append(state['indicator'])
935 source_types = {}
936 source_types['l'] = "local (such as a GPS, WWVB)"
937 source_types['u'] = "unicast (most common)"
938 source_types['m'] = "multicast"
939 source_types['b'] = "broadcast"
940 source_types['-'] = "netaddr"
941
942 proc = subprocess.Popen(['ntpq'] + params, stdout=subprocess.PIPE)
943 stdout, stderr = proc.communicate()
944
945 ntp_lines = stdout.split('\n')
946 fields = re.sub("\s+", " ", ntp_lines[0]).split()
947 fields[fields.index('st')] = 'stratum'
948 fields[fields.index('t')] = 'source_type'
949
950 ntp_peers = {}
951 for line in ntp_lines[2:]:
952 if len(line.strip()) > 0:
953 element = {}
954 values = re.sub("\s+", " ", line).split()
955 for i in range(len(values)):
956 if fields[i] == 'source_type':
957 element[fields[i]] = { 'indicator': values[i], 'comment': source_types[values[i]] }
958 elif fields[i] in ['stratum', 'when', 'poll', 'reach']:
959 if values[i] == '-':
960 element[fields[i]] = int(-1)
961 else:
962 element[fields[i]] = int(values[i])
963 elif fields[i] in ['delay', 'offset', 'jitter']:
964 element[fields[i]] = float(values[i])
965 else:
966 element[fields[i]] = values[i]
967 peer = element.pop('remote')
968 peer_state = peer[0]
969 if peer_state in ntp_state_indicators:
970 peer = peer[1:]
971 else:
972 peer_state = 'f'
973 element['current'] = False
974 if peer_state == '*':
975 element['current'] = True
976 for state in ntp_states:
977 if state['indicator'] == peer_state:
978 element['state'] = state.copy()
979 if peer_state == 'f' and state['indicator'] == ' ':
980 fail_state = state.copy()
981 fail_state.pop('indicator')
982 fail_state['indicator'] = 'f'
983 element['state'] = fail_state
984 ntp_peers[peer] = element
985
986 return ntp_peers
987
988
989def ntp_check(min_peers=1, max_stratum=3, target='*', target_type='glob', ignore_dead=False, **kwargs):
990
991 ''' Verify NTP peers status '''
992
993 agent = "ntpd peers status"
994 out = __salt__['saltutil.cmd']( tgt=target,
995 tgt_type=target_type,
996 fun='health_checks.ntp_status',
997 timeout=3
998 ) or None
999
1000 if not _minions_output(out, agent, ignore_dead):
1001 __context__['retcode'] = 2
1002 return False
1003
1004 failed_minions = []
1005 verified_minions = []
1006 for minion in out:
1007 ntp_json = out[minion]['ret']
1008 good_peers = []
1009 for peer in ntp_json:
1010 if ntp_json[peer]['stratum'] < int(max_stratum) + 1:
1011 good_peers.append(peer)
1012 if len(good_peers) > int(min_peers) - 1:
1013 if minion not in verified_minions:
1014 verified_minions.append(minion)
1015 else:
1016 if minion not in failed_minions:
1017 failed_minions.append(minion)
1018
1019 if not _failed_minions(out, agent, failed_minions):
1020 __context__['retcode'] = 2
1021 return False
1022
1023 if kwargs.get("debug", False):
1024 logger.info(verified_minions)
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001025
Dzmitry Stremkouskib71ada92019-04-05 22:37:59 +02001026 return True
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001027
1028
1029def gluster_pool_list():
1030
1031 ''' JSON formatted GlusterFS pool list command output '''
1032
1033 proc = subprocess.Popen(['gluster', 'pool', 'list'], stdout=subprocess.PIPE)
1034 stdout, stderr = proc.communicate()
1035
1036 regex = re.compile('^(\S+)\s+(\S+)\s+(\S+)$')
1037 fields = regex.findall(stdout.split('\n')[0])[0]
1038
1039 pool = {}
1040
1041 for line in stdout.split('\n')[1:]:
1042 if len(line.strip()) > 0:
1043 peer = {}
1044 values = regex.findall(line.strip())[0]
1045 for i in range(len(fields)):
1046 peer[fields[i].lower()] = values[i]
1047 uuid = peer.pop('uuid')
1048 pool[uuid] = peer
1049
1050 return pool
1051
1052
1053def gluster_volume_status():
1054
1055 ''' JSON formatted GlusterFS volumes status command output '''
1056
1057 proc = subprocess.Popen(['gluster', 'volume', 'status', 'all', 'detail'], stdout=subprocess.PIPE)
1058 stdout, stderr = proc.communicate()
1059
1060 begin_volume = False
1061 brick_lookup = False
1062 volumes = {}
1063 volume_name = ""
1064
1065 for line in stdout.split('\n'):
1066 if 'Status of volume' in line:
1067 volume_name = line.split(':')[1].strip()
1068 volumes[volume_name] = { 'bricks': [] }
1069 begin_volume = True
1070 elif len(line.strip()) == 0:
1071 if begin_volume:
1072 begin_volume = False
1073 elif '--------' in line:
1074 brick_lookup = True
1075 elif brick_lookup and line.split(':')[0].strip() == 'Brick':
1076 brick_host, brick_path = re.findall('^Brick\ *:\ (.*)', line)[0].split()[1].split(':')
1077 volumes[volume_name]['bricks'].append({ 'host': brick_host, 'path': brick_path })
1078 brick_lookup = False
1079 else:
1080 brick_key, brick_value = line.split(':')
1081 brick_key = brick_key.strip().lower().replace(' ', '_')
1082 brick_value = brick_value.strip()
1083 volumes[volume_name]['bricks'][len(volumes[volume_name]['bricks']) - 1][brick_key] = brick_value
1084
1085 return volumes
1086
1087
1088def gluster_pool_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_dead=False, **kwargs):
1089
1090 ''' Check GlusterFS peer status '''
1091
1092 agent = "glusterfs peer status"
1093 out = __salt__['saltutil.cmd']( tgt=target,
1094 tgt_type=target_type,
1095 fun='health_checks.gluster_pool_list',
1096 timeout=3,
1097 kwargs='[batch=True]'
1098 ) or None
1099
1100 if not _minions_output(out, agent, ignore_dead):
1101 __context__['retcode'] = 2
1102 return False
1103
1104 failed_minions = []
1105 verified_minions = []
1106 for minion in out:
1107 verified_minions.append(minion)
1108 gluster_json = out[minion]['ret']
1109 alive_peers = []
1110 for peer in gluster_json:
1111 if gluster_json[peer]['state'] == 'Connected':
1112 alive_peers.append(peer)
1113 else:
1114 if minion not in failed_minions:
1115 failed_minions.append(minion)
1116 if len(alive_peers) < expected_size:
1117 if minion not in failed_minions:
1118 failed_minions.append(minion)
1119
1120 if not _failed_minions(out, agent, failed_minions):
1121 __context__['retcode'] = 2
1122 return False
1123
1124 if kwargs.get("debug", False):
1125 logger.info(verified_minions)
1126
1127 return True
1128
1129
1130def gluster_volumes_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_volumes=[], ignore_dead=False, **kwargs):
1131
1132 ''' Check GlusterFS volumes status '''
1133
1134 agent = "glusterfs volumes status"
1135 out = __salt__['saltutil.cmd']( tgt=target,
1136 tgt_type=target_type,
1137 fun='health_checks.gluster_volume_status',
1138 timeout=3,
1139 kwargs='[batch=True]'
1140 ) or None
1141
1142 if not _minions_output(out, agent, ignore_dead):
1143 __context__['retcode'] = 2
1144 return False
1145
1146 failed_minions = []
1147 verified_minions = []
1148 verified_volumes = []
1149 for minion in out:
1150 verified_minions.append(minion)
1151 gluster_json = out[minion]['ret']
1152 for volume in gluster_json:
1153 if volume in ignore_volumes:
1154 continue
1155 else:
1156 verified_volumes.append(volume)
1157 alive_bricks = 0
1158 if 'bricks' not in gluster_json[volume]:
1159 if minion not in failed_minions:
1160 failed_minions.append(minion)
1161 bricks = gluster_json[volume]['bricks']
1162 if len(bricks) < expected_size:
1163 if minion not in failed_minions:
1164 failed_minions.append(minion)
1165 for brick in bricks:
1166 if brick['online'] == 'Y':
1167 alive_bricks += 1
1168 else:
1169 if minion not in failed_minions:
1170 failed_minions.append(minion)
1171 if alive_bricks < expected_size:
1172 if minion not in failed_minions:
1173 failed_minions.append(minion)
1174
1175 if not _failed_minions(out, agent, failed_minions):
1176 __context__['retcode'] = 2
1177 return False
1178
1179 if kwargs.get("debug", False):
1180 logger.info("Verified minions:")
1181 logger.info(verified_minions)
1182 logger.info("Verified volumes:")
1183 logger.info(verified_volumes)
1184
1185 return True
1186
1187
1188def ceph_cmd(cmd):
1189
1190 ''' JSON formatted ceph command output '''
1191
1192 proc = subprocess.Popen(['ceph'] + cmd.split() + ['--format', 'json-pretty'], stdout=subprocess.PIPE)
1193 stdout, stderr = proc.communicate()
1194
1195 return json.loads(stdout)
1196
1197
1198def ceph_health_check(target='I@ceph:mon', target_type='compound', expected_status='HEALTH_OK', expected_state='active+clean', ignore_dead=False, **kwargs):
1199
1200 ''' Check all ceph monitors health status '''
1201
1202 agent = "ceph health status"
1203 out = __salt__['saltutil.cmd']( tgt=target,
1204 tgt_type=target_type,
1205 fun='health_checks.ceph_cmd',
1206 arg=['status'],
1207 timeout=3
1208 ) or None
1209
1210 if not _minions_output(out, agent, ignore_dead):
1211 __context__['retcode'] = 2
1212 return False
1213
1214 failed_minions = []
1215 verified_minions = []
1216 for minion in out:
1217 verified_minions.append(minion)
1218 ceph_json = out[minion]['ret']
1219 fsid = ceph_json['fsid']
1220
1221 if ceph_json['health']['overall_status'] != expected_status:
1222 if minion not in failed_minions:
1223 failed_minions.append(minion)
1224
1225 if ceph_json['osdmap']['osdmap']['full']:
1226 if minion not in failed_minions:
1227 failed_minions.append(minion)
1228
1229 if ceph_json['osdmap']['osdmap']['nearfull']:
1230 if minion not in failed_minions:
1231 failed_minions.append(minion)
1232
1233 num_osds = ceph_json['osdmap']['osdmap']['num_osds']
1234 num_in_osds = ceph_json['osdmap']['osdmap']['num_in_osds']
1235 num_up_osds = ceph_json['osdmap']['osdmap']['num_up_osds']
1236 if not ( num_osds == num_in_osds == num_up_osds ):
1237 if minion not in failed_minions:
1238 failed_minions.append(minion)
1239
1240 quorum = len(ceph_json['quorum'])
1241 quorum_names = len(ceph_json['quorum_names'])
1242 mons = len(ceph_json['monmap']['mons'])
1243 if not ( quorum == quorum_names == mons ):
1244 if minion not in failed_minions:
1245 failed_minions.append(minion)
1246
1247 for mon in ceph_json['health']['timechecks']['mons']:
1248 if mon['health'] != expected_status:
1249 if minion not in failed_minions:
1250 failed_minions.append(minion)
1251
1252 for srv in ceph_json['health']['health']['health_services']:
1253 for mon in srv['mons']:
1254 if mon['health'] != expected_status:
1255 if minion not in failed_minions:
1256 failed_minions.append(minion)
1257
1258 for state in ceph_json['pgmap']['pgs_by_state']:
1259 if state['state_name'] != expected_state:
1260 if minion not in failed_minions:
1261 failed_minions.append(minion)
1262
1263 if not _failed_minions(out, agent, failed_minions):
1264 __context__['retcode'] = 2
1265 return False
1266
1267 if kwargs.get("debug", False):
1268 logger.info("Quorum:")
1269 logger.info(ceph_json['quorum_names'])
1270 logger.info("Verified minions:")
1271 logger.info(verified_minions)
1272
1273 return True
1274
1275
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001276def get_entropy():
1277
1278 ''' Retrieve entropy size for the host '''
1279
1280 with open('/proc/sys/kernel/random/entropy_avail', 'r') as f:
1281 entropy = f.read()
1282 return entropy
1283
1284
1285def entropy_check(target='*', target_type='glob', minimum_bits=700, ignore_dead=False, **kwargs):
1286
1287 ''' Check entropy size in cluster '''
1288
1289 agent = "entropy size status"
1290 out = __salt__['saltutil.cmd']( tgt=target,
1291 tgt_type=target_type,
1292 fun='health_checks.get_entropy',
1293 timeout=3
1294 ) or None
1295
1296 if not _minions_output(out, agent, ignore_dead):
1297 __context__['retcode'] = 2
1298 return False
1299
1300 failed_minions = []
1301 verified_minions = []
1302
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001303 for minion in out:
1304 verified_minions.append(minion)
1305 entropy = int(out[minion]['ret'])
1306 if entropy < minimum_bits:
1307 if not minion in failed_minions:
1308 failed_minions.append(minion)
1309
1310 if not _failed_minions(out, agent, failed_minions):
1311 __context__['retcode'] = 2
1312 return False
1313
1314 if kwargs.get("debug", False):
1315 logger.info(verified_minions)
1316
1317 return True
1318
1319
Dzmitry Stremkouskif1bcbb52019-04-11 15:48:24 +02001320def docker_registry_list(host):
1321
1322 ''' Retrieve and list docker catalog '''
1323
1324 try:
1325 if host[0:4] == 'http':
1326 url = host + '/v2/'
1327 else:
1328 url = 'http://' + host + '/v2/'
1329 repos = requests.get(url + '_catalog')
1330
1331 versions = {}
1332 for repo in repos.json()['repositories']:
1333 repo_versions = requests.get(url + repo + '/tags/list')
1334 versions[repo] = repo_versions.json().pop('tags')
1335 return versions
1336 except:
1337 return {}
Dzmitry Stremkouski7cd10fc2019-04-17 11:51:59 +02001338
1339
1340def docker_ps(list_all=0):
1341
1342 import docker
1343 client = docker.client.Client(base_url='unix://var/run/docker.sock')
1344 return client.containers(all=list_all)
1345
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +02001346
1347def zookeeper_cmd(cmd, hostname='localhost', port=2181):
1348
1349 ''' Execute zookeeper cmd via socket '''
1350
1351 buf_size = 1024
1352 sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
1353 sock.connect((hostname, port))
1354 sock.sendall(cmd)
1355 sock.shutdown(socket.SHUT_WR)
1356 rdata = ""
1357 while 1:
1358 data = sock.recv(buf_size)
1359 if data == "":
1360 break
1361 rdata += data
1362 sock.close()
1363 return rdata
1364
1365
1366def zookeeper_stats():
1367
1368 ''' Retrieve zookeeper stats '''
1369
1370 stats = {}
1371 stats['configuration'] = {}
1372 for line in zookeeper_cmd('conf').split('\n'):
1373 if line:
1374 key, value = line.split('=')
1375 if value.strip().isdigit():
1376 value = int(value)
1377 else:
1378 value = value.strip()
1379 stats['configuration'][key.strip().lower().replace(' ', '_')] = value
1380
1381 stats['environment'] = {}
1382 for line in zookeeper_cmd('envi').split('\n')[1:]:
1383 if line:
1384 key, value = line.split('=')
1385 if value.strip().isdigit():
1386 value = int(value)
1387 else:
1388 value = value.strip()
1389 stats['environment'][key.strip().lower().replace(' ', '_')] = value
1390
1391 stats['server'] = {}
1392 for line in zookeeper_cmd('srvr').split('\n'):
1393 if line:
1394 if re.findall('^Zookeeper version:', line, flags=re.IGNORECASE):
1395 version_str = line.split(':')[1].strip()
1396 version = version_str
1397 if '-' in version_str:
1398 version_str = version_str.split('-')[0]
1399 if '.' in version_str:
1400 version = []
1401 version_list = version_str.split('.')
1402 for elem in version_list:
1403 if elem.strip().isdigit():
1404 version.append(int(elem))
1405 stats['server']['version'] = version
1406 continue
1407 if re.findall('^Latency min/avg/max:', line, flags=re.IGNORECASE):
1408 latency_min, latency_avg, latency_max = line.split(':')[1].strip().split('/')
1409 stats['server']['latency'] = {'min':int(latency_min),'max':int(latency_max),'avg':int(latency_avg)}
1410 continue
1411 key, value = line.split(':')
1412 if value.strip().isdigit():
1413 value = int(value)
1414 else:
1415 value = value.strip()
1416 stats['server'][key.strip().lower().replace(' ', '_')] = value
1417
1418 stats['clients'] = {}
1419 for line in zookeeper_cmd('cons').split('\n'):
1420 if line:
1421 clients = re.findall('^(\s*\/)(.+)(:\d+\[\d+\])(\(.+\))$', line)[0][1:]
1422 addr = clients[0]
1423 port, direction = re.findall('^(\d+)\[(\d+)\]$', clients[1][1:])[0]
1424 client = '['+addr+']:'+str(port)
1425 stats['clients'][client] = {'direction': int(direction)}
1426 for elem in clients[2][1:-1].split(','):
1427 key, value = elem.split('=')
1428 if value.strip().isdigit():
1429 value = int(value)
1430 else:
1431 value = value.strip()
1432 stats['clients'][client][key.strip().lower().replace(' ', '_')] = value
1433
1434 return stats
1435
1436
1437def get_zookeeper_leader(target='I@opencontrail:control', target_type='compound', ignore_dead=False, **kwargs):
1438
1439 ''' Retrieve zookeeper leader '''
1440
1441 agent = "zookeeper leader retrieve"
1442 out = __salt__['saltutil.cmd']( tgt=target,
1443 tgt_type=target_type,
1444 fun='health_checks.zookeeper_stats',
1445 timeout=3
1446 ) or None
1447
1448 if not _minions_output(out, agent, ignore_dead):
1449 __context__['retcode'] = 2
1450 return False
1451
1452 leader = None
1453 for minion in out:
1454 zookeeper_mode = out[minion]['ret']['server']['mode']
1455
1456 if zookeeper_mode == 'leader':
1457 leader = minion
1458
1459 return leader
1460
1461
1462def contrail_vrouter_list(api_host='127.0.0.1', api_port=9100):
1463
1464 ''' Retrieve and list contrail vrouters.
1465 Valid targets: Contrail controllers.
1466 '''
1467
1468 try:
1469 if api_host[0:4] == 'http':
1470 url = api_host + ':' + str(api_port)
1471 else:
1472 url = 'http://' + api_host + ':' + str(api_port)
1473
1474 vrouters = requests.get(url + '/virtual-routers').json()
1475 vrouter_list = []
1476 for vr in vrouters['virtual-routers']:
1477 vr_uuid = vr['uuid']
1478 for name in vr['fq_name']:
1479 if name == "default-global-system-config":
1480 continue
1481 else:
1482 vr_name = name
1483 vrouter_list.append({'name': vr_name, 'uuid': vr_uuid})
1484 return vrouter_list
1485
1486 except:
1487 return {}
1488
1489
1490def contrail_vrouter_show(vr_uuid, api_host='127.0.0.1', api_port=9100):
1491
1492 ''' Retrieve contrail vrouter data
1493 Valid targets: Contrail controllers.
1494 '''
1495
1496 try:
1497 if api_host[0:4] == 'http':
1498 url = api_host + ':' + str(api_port)
1499 else:
1500 url = 'http://' + api_host + ':' + str(api_port)
1501
1502 return requests.get(url + '/virtual-router/' + vr_uuid).json()
1503
1504 except:
1505 return {}
1506
1507
1508def _xmletree_descend_child(given_child, tag_requested):
1509
1510 ''' Returns xmletree subelement by tag name '''
1511
1512 my_child = {}
1513
1514 for child in given_child:
1515 if child.tag == tag_requested:
1516 my_child = child
1517 break
1518
1519 return my_child
1520
1521
1522def contrail_vrouter_agent_status(api_host='127.0.0.1', api_port=8085):
1523
1524 ''' Retrieve contrail vrouter agent status '''
1525
1526 import xml.etree.ElementTree as ET
1527
1528 if api_host[0:4] == 'http':
1529 url = api_host + ':' + str(api_port)
1530 else:
1531 url = 'http://' + api_host + ':' + str(api_port)
1532
1533 try:
1534 req = requests.get(url + '/Snh_SandeshUVECacheReq?x=NodeStatus')
1535 if int(req.status_code) != 200:
1536 return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
1537 except:
1538 pass
1539
1540 try:
1541 xmletree = ET.fromstring(req.text)
1542 except:
1543 return "Could not parse xml tree %s" % str(req.text)
1544
1545 try:
1546 vrouter_data = {}
1547 child = _xmletree_descend_child(xmletree, 'NodeStatusUVE')
1548 child = _xmletree_descend_child(child, 'data')
1549 child = _xmletree_descend_child(child, 'NodeStatus')
1550 child = _xmletree_descend_child(child, 'process_status')
1551 child = _xmletree_descend_child(child, 'list')
1552 child = _xmletree_descend_child(child, 'ProcessStatus')
1553 vrouter_data['state'] = _xmletree_descend_child(child, 'state').text
1554 vrouter_data['connections'] = []
1555 child = _xmletree_descend_child(child, 'connection_infos')
1556 for elem in _xmletree_descend_child(child, 'list'):
1557 conn = {}
1558 conn['type'] = _xmletree_descend_child(elem,'type').text
1559 conn['name'] = _xmletree_descend_child(elem,'name').text
1560 conn['status'] = _xmletree_descend_child(elem,'status').text
1561 conn['description'] = _xmletree_descend_child(elem,'description').text
1562 conn['server_addrs'] = []
1563 server_addrs = _xmletree_descend_child(elem,'server_addrs')
1564 for srv in _xmletree_descend_child(server_addrs,'list'):
1565 host, port = srv.text.split(':')
1566 conn['server_addrs'].append({'host': host, 'port': port})
1567 vrouter_data['connections'].append(conn)
1568 return vrouter_data
1569 except:
1570 return "Unsupported xml tree for this function %s" % str(req.text)
1571
1572
Dzmitry Stremkouski36290202019-05-05 21:26:25 +02001573def contrail_collector_agent_status(vr_name, api_host='auto', api_port=9081):
1574
1575 ''' Retrieve contrail vrouter agent status from analyticsdb '''
1576
1577 if api_host[0:4] == 'http':
1578 url = api_host + ':' + str(api_port)
1579 elif api_host == 'auto':
1580 my_ip = __salt__['pillar.get']('_param:opencontrail_analytics_address')
1581 url = 'http://' + my_ip+ ':' + str(api_port)
1582 else:
1583 url = 'http://' + api_host + ':' + str(api_port)
1584
1585 req = requests.get(url + '/analytics/uves/vrouter/' + vr_name + '?flat')
1586 if int(req.status_code) != 200:
1587 return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
1588
1589 return json.loads(req.text)
1590
1591
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001592def contrail_control_peers_summary(api_host='auto', api_port=8083):
1593
1594 ''' Retrieve contrail control peers summary '''
1595
1596 import xml.etree.ElementTree as ET
1597
1598 if api_host[0:4] == 'http':
1599 url = api_host + ':' + str(api_port)
1600 elif api_host == 'auto':
1601 my_ip = '127.0.0.1'
1602 url = 'http://' + my_ip+ ':' + str(api_port)
1603 else:
1604 url = 'http://' + api_host + ':' + str(api_port)
1605
1606 req = requests.get(url + '/Snh_ShowBgpNeighborSummaryReq')
1607 if int(req.status_code) != 200:
1608 return "Could not fetch data from contrail control via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
1609
1610 peers = []
1611 summary = req.text
1612
1613 try:
1614 xmletree = ET.fromstring(summary)
1615 for elem in xmletree.find('.//list'):
1616 attrs = {}
1617 for child in elem:
1618 attrs[child.tag] = child.text
1619 peers.append(attrs)
1620 except:
1621 return "Could not parse xml tree %s" % str(summary)
1622
1623 return peers
1624
1625
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001626def contrail_control_peer_status(api_host='auto', api_port=8083, fields=default_peer_filter):
1627
1628 ''' Contrail control peer status '''
1629
1630 peer_status = {}
1631
1632 for peer_elem in contrail_control_peers_summary():
1633 elem = {}
1634 for attr in peer_elem:
1635 if attr in fields:
1636 elem[attr] = peer_elem[attr]
1637
1638 peer_name = peer_elem["peer"]
1639 peer_status[peer_name] = elem
1640
1641 return peer_status
1642
1643
Dzmitry Stremkouski36290202019-05-05 21:26:25 +02001644def _get_object(json_obj, obj_path):
1645
1646 ''' Retrieve subelemet of an JSON object or value '''
1647
1648 if ':' in obj_path:
1649 splitter = obj_path.split(':')
1650 k = splitter[0]
1651 v = ':'.join(splitter[1:])
1652 if k.isdigit():
1653 # Return specific element path
1654 return [ _get_object(json_obj[int(k)], v) ]
1655 elif k == '*':
1656 l = []
1657 for el in json_obj:
1658 l.append(_get_object(el, v))
1659 # Return all list elements from the path
1660 return l
1661 else:
1662 # Contrail output may have nested JSON
1663 if isinstance(json_obj, str) or isinstance(json_obj, unicode):
1664 json_obj = json.loads(json_obj)
1665 # Assume list. Return it
1666 return { k: _get_object(json_obj[k], v) }
1667 else:
1668 return { obj_path: json_obj[obj_path] }
1669
1670
1671def _deepmerge(o1, o2):
1672
1673 ''' Deep merge JSON objects '''
1674
1675 o3 = {}
1676 if type(o1) == type(o2):
1677 if type(o1) == dict or type(o1) == tuple:
1678 for k in set(o1.keys() + o2.keys()):
1679 if k in o1:
1680 if k in o2:
1681 o3[k] = _deepmerge(o1[k], o2[k])
1682 else:
1683 o3[k] = o1[k]
1684 else:
1685 o3[k] = o2[k]
1686 elif type(o1) == list or type(o1) == set:
1687 o3 = [] + o2
1688 for el in o3:
1689 i = o3.index(el)
1690 o3[i] = _deepmerge(o1[i], o2[i])
1691 else:
1692 o3 = o2
1693 else:
1694 o3 = o2
1695
1696 return o3
1697
1698
1699def contrail_vrouter_agent_info(vr_name, filter_map=default_vrouter_info_map):
1700
1701 ''' Retrieve filtered contrail vrouter agent info from analyticsdb '''
1702
1703 vr_agent_status = contrail_collector_agent_status(vr_name)
1704 vr_info = {}
1705 for conf in filter_map:
1706 vr_info[conf] = {}
1707 for el_path in filter_map[conf]:
1708 vr_info = _deepmerge(vr_info, { conf: _get_object(vr_agent_status[conf], el_path) } )
1709
1710 return vr_info
1711
1712
Dzmitry Stremkouski480c84a2019-10-17 19:33:18 +02001713def contrail_mesh_check(target='I@opencontrail:control', target_type='compound', ignore_dead=False, strict=False, **kwargs):
1714
1715 ''' Check if contrail elements are connected to each other '''
1716
1717 agent = "contrail mesh check"
1718 out = __salt__['saltutil.cmd']( tgt=target,
1719 tgt_type=target_type,
1720 fun='health_checks.contrail_control_peer_status',
1721 timeout=3
1722 ) or None
1723
1724 if not _minions_output(out, agent, ignore_dead):
1725 __context__['retcode'] = 2
1726 return False
1727
1728
1729 minions = []
1730 for node in out:
1731 if strict:
1732 minions.append(node)
1733 else:
1734 minions.append(node.split('.')[0])
1735
1736 elements = {}
1737 for node in out:
1738 peer_elem = out[node]["ret"]
1739 for elem in peer_elem:
1740 if not strict:
1741 elem = elem.split('.')[0]
1742 if elem in elements:
1743 continue
1744 elements[elem] = {}
1745 elements[elem]["peers"] = []
1746 elements[elem]["my_address"] = peer_elem[elem]["peer_address"]
1747 if peer_elem[elem]["encoding"] == "XMPP":
1748 elements[elem]["type"] = "COMPUTE"
1749 elif peer_elem[elem]["encoding"] == "BGP":
1750 if elem in minions:
1751 elements[elem]["type"] = "CONTROLLER"
1752 else:
1753 elements[elem]["type"] = "EDGE-ROUTER"
1754
1755 for node in out:
1756 if strict:
1757 peer_name = node
1758 else:
1759 peer_name = node.split('.')[0]
1760 peer_elem = out[node]["ret"]
1761 for elem in peer_elem:
1762 if not strict:
1763 elem = elem.split('.')[0]
1764 peer_elem[elem]["peer"] = peer_name
1765 del(peer_elem[elem]["peer_address"])
1766 elements[elem]["peers"].append(peer_elem[elem])
1767
1768 failed_peers = []
1769 for elem_name in elements:
1770 elem = elements[elem_name]
1771 if elem["type"] == "COMPUTE":
1772 if len(elem["peers"]) < 2:
1773 if elem not in failed_peers:
1774 failed_peers.append(elem)
1775 if elem["type"] == "CONTROLLER":
1776 if len(elem["peers"]) < len(minions)-1:
1777 if elem not in failed_peers:
1778 failed_peers.append(elem)
1779 if elem["type"] == "EDGE-ROUTER":
1780 if not len(elem["peers"]) == len(minions):
1781 if elem not in failed_peers:
1782 failed_peers.append(elem)
1783 for peer in elem["peers"]:
1784 if not peer["state"] == "Established":
1785 if elem not in failed_peers:
1786 failed_peers.append(elem)
1787
1788 if len(failed_peers) > 0:
1789 logger.error("%s check FAILED" % agent)
1790 if strict:
1791 logger.error("Strict mode is on. Check DNS names in output")
1792 logger.error("Minions output:")
1793 logger.error(json.dumps(out, indent=4))
1794 else:
1795 logger.error("Failed peers:")
1796 logger.error(json.dumps(failed_peers, indent=4))
1797 __context__['retcode'] = 2
1798 return False
1799
1800 if kwargs.get("debug", False):
1801 logger.info(json.dumps(elements, indent=4))
1802
1803 return True
1804
1805
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001806def kafka_brokers_ids():
1807
1808 ''' Retrieve kafka brokers ids '''
1809
1810 brokers_ids = []
1811 for line in zookeeper_cmd('dump').split('\n'):
1812 if line:
1813 if '/brokers/ids/' in line:
1814 brokers_ids.append(int(line.split('/')[3]))
1815
1816 return brokers_ids
1817
1818
Dzmitry Stremkouski2c709f22019-04-22 02:27:54 +02001819def libvirt_capabilities():
1820
1821 ''' JSON formatted libvirtcapabilities list '''
1822
1823 import xml.etree.ElementTree as ET
1824
1825 try:
1826 proc = subprocess.Popen(['virsh', 'capabilities'], stdout=subprocess.PIPE)
1827 stdout, stderr = proc.communicate()
1828 xmletree = ET.fromstring(stdout)
1829 except:
1830 return "Could not parse xml tree %s" % str(stdout)
1831
1832 try:
1833 capabilities = {}
1834 for elem in xmletree:
1835 if elem.tag == "guest":
1836 for el in elem:
1837 if el.tag == 'arch':
1838 _name = el.attrib['name']
1839 capabilities[_name] = []
1840 for arch in el:
1841 if arch.tag == 'machine':
1842 if 'canonical' not in arch.attrib:
1843 capabilities[_name].append(arch.text)
1844
1845 return capabilities
1846 except:
1847 return "Unsupported xml tree for this function %s" % str(stdout)
1848
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001849
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001850def keystone_keys_attractor(keys_dir='/var/lib/keystone/fernet-keys', keys_ids=range(0,-4,-1)):
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001851
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001852 ''' JSON formatted dict of keystone keys sha256 sums '''
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001853
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001854 keys = os.listdir(keys_dir)
1855 keys.sort()
1856 keys_dict = {}
1857 try:
1858 for i in keys_ids:
1859 with open("%s/%s" % (keys_dir, str(keys[i])), 'r') as key_file:
1860 _iter1 = hashlib.sha256(key_file.read()).hexdigest()
1861 _iter2 = hashlib.sha256(_iter1).hexdigest()
1862 _iter3 = hashlib.sha256(_iter2).hexdigest()
1863 keys_dict[str(keys[i])] = _iter3
1864 except:
1865 pass
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001866
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001867 return keys_dict
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001868
Dzmitry Stremkouskia78a04d2019-07-13 11:05:03 +02001869
Dzmitry Stremkouski88275d32019-07-23 19:42:42 +02001870def keystone_keys_check(target='I@keystone:server', target_type='compound', ignore_dead=False, **kwargs):
1871
1872 ''' Check cluster keystone keys are in sync '''
1873
1874 keys_type = kwargs.get("keys_type", 'fernet')
1875
1876 supported_key_types = ['fernet', 'credential']
1877 if keys_type not in supported_key_types:
1878 logger.error("Unsupported keys type: %s" % str(keys_type))
1879 logger.error("Supported keys type are: %s" % str(supported_key_types))
1880 __context__['retcode'] = 2
1881 return False
1882
1883 agent = "keystone %s keys sync" % keys_type
1884 keys_dir_default = '/var/lib/keystone/%s-keys' % keys_type
1885 keys_dir = kwargs.get("keys_dir", keys_dir_default)
1886
1887 out = __salt__['saltutil.cmd']( tgt=target,
1888 tgt_type=target_type,
1889 fun='health_checks.keystone_keys_attractor',
1890 arg=["keys_dir='%s'" % keys_dir],
1891 timeout=3
1892 ) or None
1893
1894 if not _minions_output(out, agent, ignore_dead):
1895 __context__['retcode'] = 2
1896 return False
1897
1898 cluster_attractors = []
1899 failed_minions = []
1900 verified_minions = []
1901 attractor = {}
1902
1903 for minion in out:
1904 verified_minions.append(minion)
1905 attractor = out[minion]['ret']
1906 if attractor == {}:
1907 failed_minions.append(minion)
1908 if attractor not in cluster_attractors:
1909 cluster_attractors.append(attractor)
1910
1911 if not _failed_minions(out, agent, failed_minions):
1912 __context__['retcode'] = 2
1913 return False
1914
1915 if len(cluster_attractors) > 1:
1916 failed_minions = []
1917 for minion in out:
1918 failed_minions.append(minion)
1919
1920 if not _failed_minions(out, agent, failed_minions):
1921 __context__['retcode'] = 2
1922 return False
1923
1924 if kwargs.get("debug", False):
1925 logger.info("%s check done." % agent)
1926 logger.info(verified_minions)
1927
1928 return True
1929
Dzmitry Stremkouski83b9c162019-09-25 09:41:45 +02001930
1931def list_namespaces(raw_output=False):
1932
1933 ''' JSON formatted ip netns dict '''
1934
1935 proc = subprocess.Popen(['ip', 'netns'], stdout=subprocess.PIPE)
1936 stdout, stderr = proc.communicate()
1937
1938 namespaces = []
1939
1940 for line in stdout.split('\n'):
1941 if len(line) > 0:
1942 netns = {}
1943 if raw_output:
1944 netns['id'] = -2
1945 netns['uuid'] = line
1946 else:
1947 line_splitted = line.split()
1948 if len(line_splitted) > 1:
1949 ns_uuid = line_splitted[0]
1950 ns_id = int(line_splitted[2][:-1])
1951 else:
1952 ns_uuid = line
1953 ns_id = -1
1954 netns['id'] = ns_id
1955 netns['uuid'] = ns_uuid
1956 namespaces.append(netns)
1957
1958 return namespaces