[Tooling update] health_checks
* Added:
- Execute zookeeper cmds via socket
- Retrieve zookeeper stats
- Retrieve zookeeper leader
- Fetch contrail vrouter list from control node
- Fetch contrail vrouter summary from control node
- Fetch contrail vrouter agent connections from compute node
- Fetch libvirt supported machine types for compute node
- compund fix for contrail check
- failed_minion output fix for netdev check
Related-Prod: PROD-29236
Change-Id: I4ad5b053609bfbb1c072d9895d47117a62e18309
diff --git a/_modules/health_checks.py b/_modules/health_checks.py
index 3d1fa9a..ad43343 100644
--- a/_modules/health_checks.py
+++ b/_modules/health_checks.py
@@ -145,7 +145,7 @@
return True
-def contrail_check(target='I@contrail:control or I@contrail:collector or I@opencontrail:compute or I@opencontrail:client', target_type='compound', ignore_dead=False, **kwargs):
+def contrail_check(target='I@opencontrail:control or I@opencontrail:collector or I@opencontrail:compute', target_type='compound', ignore_dead=False, **kwargs):
''' Verify contrail status returns nothing critical '''
@@ -594,7 +594,7 @@
__context__['retcode'] = 2
return False
- failed_minions = []
+ failed_minions = {}
verified_minions = []
for minion in out:
verified_minions.append(minion)
@@ -603,10 +603,16 @@
if netdev not in ignore_devices:
if int(dev_json[netdev]['rx_drop']) > int(rx_drop_limit):
if minion not in failed_minions:
- failed_minions.append(minion)
+ failed_minions[minion] = {}
+ if netdev not in failed_minions[minion]:
+ failed_minions[minion][netdev] = {}
+ failed_minions[minion][netdev]['rx_drop'] = int(dev_json[netdev]['rx_drop'])
if int(dev_json[netdev]['tx_drop']) > int(tx_drop_limit):
if minion not in failed_minions:
- failed_minions.append(minion)
+ failed_minions[minion] = {}
+ if netdev not in failed_minions[minion]:
+ failed_minions[minion][netdev] = {}
+ failed_minions[minion][netdev]['tx_drop'] = int(dev_json[netdev]['tx_drop'])
if not _failed_minions(out, agent, failed_minions):
__context__['retcode'] = 2
@@ -1082,3 +1088,260 @@
client = docker.client.Client(base_url='unix://var/run/docker.sock')
return client.containers(all=list_all)
+
+def zookeeper_cmd(cmd, hostname='localhost', port=2181):
+
+ ''' Execute zookeeper cmd via socket '''
+
+ buf_size = 1024
+ sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+ sock.connect((hostname, port))
+ sock.sendall(cmd)
+ sock.shutdown(socket.SHUT_WR)
+ rdata = ""
+ while 1:
+ data = sock.recv(buf_size)
+ if data == "":
+ break
+ rdata += data
+ sock.close()
+ return rdata
+
+
+def zookeeper_stats():
+
+ ''' Retrieve zookeeper stats '''
+
+ stats = {}
+ stats['configuration'] = {}
+ for line in zookeeper_cmd('conf').split('\n'):
+ if line:
+ key, value = line.split('=')
+ if value.strip().isdigit():
+ value = int(value)
+ else:
+ value = value.strip()
+ stats['configuration'][key.strip().lower().replace(' ', '_')] = value
+
+ stats['environment'] = {}
+ for line in zookeeper_cmd('envi').split('\n')[1:]:
+ if line:
+ key, value = line.split('=')
+ if value.strip().isdigit():
+ value = int(value)
+ else:
+ value = value.strip()
+ stats['environment'][key.strip().lower().replace(' ', '_')] = value
+
+ stats['server'] = {}
+ for line in zookeeper_cmd('srvr').split('\n'):
+ if line:
+ if re.findall('^Zookeeper version:', line, flags=re.IGNORECASE):
+ version_str = line.split(':')[1].strip()
+ version = version_str
+ if '-' in version_str:
+ version_str = version_str.split('-')[0]
+ if '.' in version_str:
+ version = []
+ version_list = version_str.split('.')
+ for elem in version_list:
+ if elem.strip().isdigit():
+ version.append(int(elem))
+ stats['server']['version'] = version
+ continue
+ if re.findall('^Latency min/avg/max:', line, flags=re.IGNORECASE):
+ latency_min, latency_avg, latency_max = line.split(':')[1].strip().split('/')
+ stats['server']['latency'] = {'min':int(latency_min),'max':int(latency_max),'avg':int(latency_avg)}
+ continue
+ key, value = line.split(':')
+ if value.strip().isdigit():
+ value = int(value)
+ else:
+ value = value.strip()
+ stats['server'][key.strip().lower().replace(' ', '_')] = value
+
+ stats['clients'] = {}
+ for line in zookeeper_cmd('cons').split('\n'):
+ if line:
+ clients = re.findall('^(\s*\/)(.+)(:\d+\[\d+\])(\(.+\))$', line)[0][1:]
+ addr = clients[0]
+ port, direction = re.findall('^(\d+)\[(\d+)\]$', clients[1][1:])[0]
+ client = '['+addr+']:'+str(port)
+ stats['clients'][client] = {'direction': int(direction)}
+ for elem in clients[2][1:-1].split(','):
+ key, value = elem.split('=')
+ if value.strip().isdigit():
+ value = int(value)
+ else:
+ value = value.strip()
+ stats['clients'][client][key.strip().lower().replace(' ', '_')] = value
+
+ return stats
+
+
+def get_zookeeper_leader(target='I@opencontrail:control', target_type='compound', ignore_dead=False, **kwargs):
+
+ ''' Retrieve zookeeper leader '''
+
+ agent = "zookeeper leader retrieve"
+ out = __salt__['saltutil.cmd']( tgt=target,
+ tgt_type=target_type,
+ fun='health_checks.zookeeper_stats',
+ timeout=3
+ ) or None
+
+ if not _minions_output(out, agent, ignore_dead):
+ __context__['retcode'] = 2
+ return False
+
+ leader = None
+ for minion in out:
+ zookeeper_mode = out[minion]['ret']['server']['mode']
+
+ if zookeeper_mode == 'leader':
+ leader = minion
+
+ return leader
+
+
+def contrail_vrouter_list(api_host='127.0.0.1', api_port=9100):
+
+ ''' Retrieve and list contrail vrouters.
+ Valid targets: Contrail controllers.
+ '''
+
+ try:
+ if api_host[0:4] == 'http':
+ url = api_host + ':' + str(api_port)
+ else:
+ url = 'http://' + api_host + ':' + str(api_port)
+
+ vrouters = requests.get(url + '/virtual-routers').json()
+ vrouter_list = []
+ for vr in vrouters['virtual-routers']:
+ vr_uuid = vr['uuid']
+ for name in vr['fq_name']:
+ if name == "default-global-system-config":
+ continue
+ else:
+ vr_name = name
+ vrouter_list.append({'name': vr_name, 'uuid': vr_uuid})
+ return vrouter_list
+
+ except:
+ return {}
+
+
+def contrail_vrouter_show(vr_uuid, api_host='127.0.0.1', api_port=9100):
+
+ ''' Retrieve contrail vrouter data
+ Valid targets: Contrail controllers.
+ '''
+
+ try:
+ if api_host[0:4] == 'http':
+ url = api_host + ':' + str(api_port)
+ else:
+ url = 'http://' + api_host + ':' + str(api_port)
+
+ return requests.get(url + '/virtual-router/' + vr_uuid).json()
+
+ except:
+ return {}
+
+
+def _xmletree_descend_child(given_child, tag_requested):
+
+ ''' Returns xmletree subelement by tag name '''
+
+ my_child = {}
+
+ for child in given_child:
+ if child.tag == tag_requested:
+ my_child = child
+ break
+
+ return my_child
+
+
+def contrail_vrouter_agent_status(api_host='127.0.0.1', api_port=8085):
+
+ ''' Retrieve contrail vrouter agent status '''
+
+ import xml.etree.ElementTree as ET
+
+ if api_host[0:4] == 'http':
+ url = api_host + ':' + str(api_port)
+ else:
+ url = 'http://' + api_host + ':' + str(api_port)
+
+ try:
+ req = requests.get(url + '/Snh_SandeshUVECacheReq?x=NodeStatus')
+ if int(req.status_code) != 200:
+ return "Could not fetch data from vrouter agent via %s.\nGot bad status code: %s\n%s" % (url, str(req.status_code), str(req.text))
+ except:
+ pass
+
+ try:
+ xmletree = ET.fromstring(req.text)
+ except:
+ return "Could not parse xml tree %s" % str(req.text)
+
+ try:
+ vrouter_data = {}
+ child = _xmletree_descend_child(xmletree, 'NodeStatusUVE')
+ child = _xmletree_descend_child(child, 'data')
+ child = _xmletree_descend_child(child, 'NodeStatus')
+ child = _xmletree_descend_child(child, 'process_status')
+ child = _xmletree_descend_child(child, 'list')
+ child = _xmletree_descend_child(child, 'ProcessStatus')
+ vrouter_data['state'] = _xmletree_descend_child(child, 'state').text
+ vrouter_data['connections'] = []
+ child = _xmletree_descend_child(child, 'connection_infos')
+ for elem in _xmletree_descend_child(child, 'list'):
+ conn = {}
+ conn['type'] = _xmletree_descend_child(elem,'type').text
+ conn['name'] = _xmletree_descend_child(elem,'name').text
+ conn['status'] = _xmletree_descend_child(elem,'status').text
+ conn['description'] = _xmletree_descend_child(elem,'description').text
+ conn['server_addrs'] = []
+ server_addrs = _xmletree_descend_child(elem,'server_addrs')
+ for srv in _xmletree_descend_child(server_addrs,'list'):
+ host, port = srv.text.split(':')
+ conn['server_addrs'].append({'host': host, 'port': port})
+ vrouter_data['connections'].append(conn)
+ return vrouter_data
+ except:
+ return "Unsupported xml tree for this function %s" % str(req.text)
+
+
+def libvirt_capabilities():
+
+ ''' JSON formatted libvirtcapabilities list '''
+
+ import xml.etree.ElementTree as ET
+
+ try:
+ proc = subprocess.Popen(['virsh', 'capabilities'], stdout=subprocess.PIPE)
+ stdout, stderr = proc.communicate()
+ xmletree = ET.fromstring(stdout)
+ except:
+ return "Could not parse xml tree %s" % str(stdout)
+
+ try:
+ capabilities = {}
+ for elem in xmletree:
+ if elem.tag == "guest":
+ for el in elem:
+ if el.tag == 'arch':
+ _name = el.attrib['name']
+ capabilities[_name] = []
+ for arch in el:
+ if arch.tag == 'machine':
+ if 'canonical' not in arch.attrib:
+ capabilities[_name].append(arch.text)
+
+ return capabilities
+ except:
+ return "Unsupported xml tree for this function %s" % str(stdout)
+