[Tooling update] health_checks

* Added:

- rabbitmq queue listing for a node
- CEPH health status check
- Ability to execute arbitrary ceph commands
- Retrieve docker registry repos recursively
- Retrieve GlusterFS pool list
- Retrieve FlusterFS volumes status
- Check GlusterFS pool health and size
- Check GlusterFS volumes bricks health and size

Related-Prod: PROD-29236

Change-Id: I1b12fe39d2d4f190db3cc68a6fe18f919f044eda
diff --git a/_modules/health_checks.py b/_modules/health_checks.py
index c321149..4245d03 100644
--- a/_modules/health_checks.py
+++ b/_modules/health_checks.py
@@ -1,3 +1,4 @@
+import requests
 import subprocess
 import socket
 import salt.utils
@@ -282,6 +283,37 @@
     return new_rabbitctl_json
 
 
+def rabbitmq_list_queues(vhost='/'):
+
+    ''' JSON formatted RabbitMQ queues list '''
+
+    proc = subprocess.Popen(['rabbitmqctl', 'list_queues' , '-p', vhost], stdout=subprocess.PIPE)
+    stdout, stderr =  proc.communicate()
+
+    queues = {}
+    for line in stdout.split('\n'):
+        if re.findall('[0-9]$', line):
+            queue_name, num = re.sub(r"\s+", " ", line).split()
+            queues[queue_name] = int(num)
+
+    return queues
+
+
+def rabbitmq_list_vhosts():
+
+    ''' JSON formatted RabbitMQ vhosts list '''
+
+    proc = subprocess.Popen(['rabbitmqctl', 'list_vhosts'], stdout=subprocess.PIPE)
+    stdout, stderr =  proc.communicate()
+
+    vhosts = []
+    for line in stdout.split('\n'):
+        if re.findall('^/', line):
+            vhosts.append(line)
+
+    return vhosts
+
+
 def rabbitmq_cmd(cmd):
 
     ''' JSON formatted RabbitMQ command output '''
@@ -728,4 +760,272 @@
 
     if kwargs.get("debug", False):
         logger.info(verified_minions)
+
     return True
+
+
+def gluster_pool_list():
+
+    ''' JSON formatted GlusterFS pool list command output '''
+
+    proc = subprocess.Popen(['gluster', 'pool', 'list'], stdout=subprocess.PIPE)
+    stdout, stderr =  proc.communicate()
+
+    regex = re.compile('^(\S+)\s+(\S+)\s+(\S+)$')
+    fields = regex.findall(stdout.split('\n')[0])[0]
+
+    pool = {}
+
+    for line in stdout.split('\n')[1:]:
+        if len(line.strip()) > 0:
+            peer = {}
+            values = regex.findall(line.strip())[0]
+            for i in range(len(fields)):
+                peer[fields[i].lower()] = values[i]
+            uuid = peer.pop('uuid')
+            pool[uuid] = peer
+
+    return pool
+
+
+def gluster_volume_status():
+
+    ''' JSON formatted GlusterFS volumes status command output '''
+
+    proc = subprocess.Popen(['gluster', 'volume', 'status', 'all', 'detail'], stdout=subprocess.PIPE)
+    stdout, stderr =  proc.communicate()
+
+    begin_volume = False
+    brick_lookup = False
+    volumes = {}
+    volume_name = ""
+
+    for line in stdout.split('\n'):
+        if 'Status of volume' in line:
+            volume_name = line.split(':')[1].strip()
+            volumes[volume_name] = { 'bricks': [] }
+            begin_volume = True
+        elif len(line.strip()) == 0:
+            if begin_volume:
+                begin_volume = False
+        elif '--------' in line:
+            brick_lookup = True
+        elif brick_lookup and line.split(':')[0].strip() == 'Brick':
+            brick_host, brick_path = re.findall('^Brick\ *:\ (.*)', line)[0].split()[1].split(':')
+            volumes[volume_name]['bricks'].append({ 'host': brick_host, 'path': brick_path })
+            brick_lookup = False
+        else:
+            brick_key, brick_value = line.split(':')
+            brick_key = brick_key.strip().lower().replace(' ', '_')
+            brick_value = brick_value.strip()
+            volumes[volume_name]['bricks'][len(volumes[volume_name]['bricks']) - 1][brick_key] = brick_value
+
+    return volumes
+
+
+def gluster_pool_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_dead=False, **kwargs):
+
+    ''' Check GlusterFS peer status '''
+
+    agent = "glusterfs peer status"
+    out = __salt__['saltutil.cmd']( tgt=target,
+                                    tgt_type=target_type,
+                                    fun='health_checks.gluster_pool_list',
+                                    timeout=3,
+                                    kwargs='[batch=True]'
+                                  ) or None
+
+    if not _minions_output(out, agent, ignore_dead):
+        __context__['retcode'] = 2
+        return False
+
+    failed_minions = []
+    verified_minions = []
+    for minion in out:
+        verified_minions.append(minion)
+        gluster_json = out[minion]['ret']
+        alive_peers = []
+        for peer in gluster_json:
+            if gluster_json[peer]['state'] == 'Connected':
+                alive_peers.append(peer)
+            else:
+                if minion not in failed_minions:
+                    failed_minions.append(minion)
+        if len(alive_peers) < expected_size:
+            if minion not in failed_minions:
+                failed_minions.append(minion)
+
+    if not _failed_minions(out, agent, failed_minions):
+        __context__['retcode'] = 2
+        return False
+
+    if kwargs.get("debug", False):
+        logger.info(verified_minions)
+
+    return True
+
+
+def gluster_volumes_check(target='I@glusterfs:server', target_type='compound', expected_size=3, ignore_volumes=[], ignore_dead=False, **kwargs):
+
+    ''' Check GlusterFS volumes status '''
+
+    agent = "glusterfs volumes status"
+    out = __salt__['saltutil.cmd']( tgt=target,
+                                    tgt_type=target_type,
+                                    fun='health_checks.gluster_volume_status',
+                                    timeout=3,
+                                    kwargs='[batch=True]'
+                                  ) or None
+
+    if not _minions_output(out, agent, ignore_dead):
+        __context__['retcode'] = 2
+        return False
+
+    failed_minions = []
+    verified_minions = []
+    verified_volumes = []
+    for minion in out:
+        verified_minions.append(minion)
+        gluster_json = out[minion]['ret']
+        for volume in gluster_json:
+            if volume in ignore_volumes:
+                continue
+            else:
+                verified_volumes.append(volume)
+            alive_bricks = 0
+            if 'bricks' not in gluster_json[volume]:
+                if minion not in failed_minions:
+                    failed_minions.append(minion)
+            bricks = gluster_json[volume]['bricks']
+            if len(bricks) < expected_size:
+                if minion not in failed_minions:
+                    failed_minions.append(minion)
+            for brick in bricks:
+                if brick['online'] == 'Y':
+                    alive_bricks += 1
+                else:
+                    if minion not in failed_minions:
+                        failed_minions.append(minion)
+            if alive_bricks < expected_size:
+                if minion not in failed_minions:
+                    failed_minions.append(minion)
+
+    if not _failed_minions(out, agent, failed_minions):
+        __context__['retcode'] = 2
+        return False
+
+    if kwargs.get("debug", False):
+        logger.info("Verified minions:")
+        logger.info(verified_minions)
+        logger.info("Verified volumes:")
+        logger.info(verified_volumes)
+
+    return True
+
+
+def ceph_cmd(cmd):
+
+    ''' JSON formatted ceph command output '''
+
+    proc = subprocess.Popen(['ceph'] + cmd.split() + ['--format', 'json-pretty'], stdout=subprocess.PIPE)
+    stdout, stderr =  proc.communicate()
+
+    return json.loads(stdout)
+
+
+def ceph_health_check(target='I@ceph:mon', target_type='compound', expected_status='HEALTH_OK', expected_state='active+clean', ignore_dead=False, **kwargs):
+
+    ''' Check all ceph monitors health status '''
+
+    agent = "ceph health status"
+    out = __salt__['saltutil.cmd']( tgt=target,
+                                    tgt_type=target_type,
+                                    fun='health_checks.ceph_cmd',
+                                    arg=['status'],
+                                    timeout=3
+                                  ) or None
+
+    if not _minions_output(out, agent, ignore_dead):
+        __context__['retcode'] = 2
+        return False
+
+    failed_minions = []
+    verified_minions = []
+    for minion in out:
+        verified_minions.append(minion)
+        ceph_json = out[minion]['ret']
+        fsid = ceph_json['fsid']
+
+        if ceph_json['health']['overall_status'] != expected_status:
+            if minion not in failed_minions:
+                failed_minions.append(minion)
+
+        if ceph_json['osdmap']['osdmap']['full']:
+            if minion not in failed_minions:
+                failed_minions.append(minion)
+
+        if ceph_json['osdmap']['osdmap']['nearfull']:
+            if minion not in failed_minions:
+                failed_minions.append(minion)
+
+        num_osds = ceph_json['osdmap']['osdmap']['num_osds']
+        num_in_osds = ceph_json['osdmap']['osdmap']['num_in_osds']
+        num_up_osds = ceph_json['osdmap']['osdmap']['num_up_osds']
+        if not ( num_osds == num_in_osds == num_up_osds ):
+            if minion not in failed_minions:
+                failed_minions.append(minion)
+
+        quorum = len(ceph_json['quorum'])
+        quorum_names = len(ceph_json['quorum_names'])
+        mons = len(ceph_json['monmap']['mons'])
+        if not ( quorum == quorum_names == mons ):
+            if minion not in failed_minions:
+                failed_minions.append(minion)
+
+        for mon in ceph_json['health']['timechecks']['mons']:
+            if mon['health'] != expected_status:
+                if minion not in failed_minions:
+                    failed_minions.append(minion)
+
+        for srv in ceph_json['health']['health']['health_services']:
+            for mon in srv['mons']:
+                if mon['health'] != expected_status:
+                    if minion not in failed_minions:
+                        failed_minions.append(minion)
+
+        for state in ceph_json['pgmap']['pgs_by_state']:
+            if state['state_name'] != expected_state:
+                if minion not in failed_minions:
+                    failed_minions.append(minion)
+
+    if not _failed_minions(out, agent, failed_minions):
+        __context__['retcode'] = 2
+        return False
+
+    if kwargs.get("debug", False):
+        logger.info("Quorum:")
+        logger.info(ceph_json['quorum_names'])
+        logger.info("Verified minions:")
+        logger.info(verified_minions)
+
+    return True
+
+
+def docker_registry_list(host):
+
+    ''' Retrieve and list docker catalog '''
+
+    try:
+        if host[0:4] == 'http':
+            url = host + '/v2/'
+        else:
+            url = 'http://' + host + '/v2/'
+        repos = requests.get(url + '_catalog')
+
+        versions = {}
+        for repo in repos.json()['repositories']:
+            repo_versions = requests.get(url + repo + '/tags/list')
+            versions[repo] = repo_versions.json().pop('tags')
+        return versions
+    except:
+        return {}