Implement VM analyzer for looking up duplicated, misplaced and lost VMs
RELATED-PROD: PROD-35666

Change-Id: Ie0063bd7a6bbd2c8e276c2bdbd95be1cf1eeddcf
diff --git a/scripts/vm_tracker/README b/scripts/vm_tracker/README
new file mode 100644
index 0000000..5e3152d
--- /dev/null
+++ b/scripts/vm_tracker/README
@@ -0,0 +1,13 @@
+VM tracking tool
+
+Q: What does it do?
+A: Searches for VMs that are duplicated (VMs with same IDs on different hypervisors), VMs that are misplaced (running on a different hypervisor that what Nova expects), VMs that are lost (existing in libvirt not having an uuid)
+
+Q: How does it work?
+A: By comparing the output of Nova (nova list --all) and virsh (virsh list --all, virsh list --uuid)
+
+Q: How do I use it?
+A: run "collect_data.sh" to gather the data from Nova and libvirt, then run "analyze.py" to get the results.
+
+Q: What does it need to run?
+A: Salt access, bash on the compute node, and a correct hypervisor name pattern set in the analyze.py (check comments in the source before running it).
diff --git a/scripts/vm_tracker/analyze.py b/scripts/vm_tracker/analyze.py
new file mode 100644
index 0000000..51adbb6
--- /dev/null
+++ b/scripts/vm_tracker/analyze.py
@@ -0,0 +1,101 @@
+#!/usr/bin/python
+import json
+def lookup_near(ls, name):
+    for vm in ls:
+        if vm['id'] == name:
+            return vm['id']
+    return False
+def lookup_far(dc, name):
+    result_hvs = []
+    for hv in dc:
+        res = lookup_near(dc[hv], name)
+        if res:
+            result_hvs.append(hv)
+    return result_hvs
+lost_vms = {}
+hypervisors = {}
+hypervisor_pattern = "cmp" #Replace with your own pattern, ensure it's unique so it wouldn't mix up with VM names
+skip_pattern = "------------"
+current_hv = ""
+vm_pattern = "-"
+with open("virsh_vms", "rt") as f:
+    for line in f.readlines():
+        line = line.replace("\n", "")
+        if skip_pattern in line:
+            continue
+        elif hypervisor_pattern in line:
+            current_hv = line.replace(":", "")
+            if current_hv in hypervisors:
+                print("Duplicate hypervisor %s, exiting" % current_hv)
+                break
+            else:
+                hypervisors[current_hv] = []
+        elif vm_pattern in line:
+            if not current_hv:
+                print("Malformed virsh list, exiting")
+                break
+            vm_info_struct = [x for x in line.replace("\n", "").replace("\t"," ").replace("shut off", "shutoff").split(" ") if x]
+            if len(vm_info_struct) == 4:
+                iid, virsh_id, iname, state  = vm_info_struct
+                hypervisors[current_hv].append({"id": iid, "state": state})
+            elif len(vm_info_struct) == 3: #No UUID assigned
+                virsh_id, iname, state = vm_info_struct
+                if not lost_vms.has_key(current_hv):
+                    lost_vms[current_hv] = [iname + ":" + state]
+                else:
+                    lost_vms[current_hv].append(iname + ":" + state)
+nova_out = ""
+nova_vms = {}
+with open("nova_vms", "rt") as f:
+     for line in f.readlines():
+         if "servers" in line:
+             if "RESP BODY" in line:
+                 nova_out = line.replace("RESP BODY: ", "").replace("\n", "")
+                 nova_vms_json = json.loads(nova_out)
+                 for vm in nova_vms_json['servers']:
+                     vm_id = vm['id']
+                     vm_iname = vm['OS-EXT-SRV-ATTR:instance_name']
+                     vm_hv = vm['OS-EXT-SRV-ATTR:hypervisor_hostname']
+                     vm_state = vm['OS-EXT-STS:vm_state']
+                     if vm_hv not in nova_vms:
+                         nova_vms[vm_hv] = []
+                     nova_vms[vm_hv].append({"id": vm_id, "name": vm_iname, "state": vm_state})
+rev = {}
+lsdup = []
+for hv in hypervisors:
+   for vm in hypervisors[hv]:
+       if not vm['id'] in rev:
+           rev[vm['id']] = [hv+"(%s)"%vm['state']]
+       else:
+           rev[vm['id']].append(hv+"(%s)"%vm['state'])
+for vm_id in rev:
+   if len(rev[vm_id]) > 1:
+       print "Duplicate VM: %s on %s" % (vm_id, rev[vm_id])
+       lsdup.append(vm_id)
+for hv in hypervisors:
+    if hv not in nova_vms and len(hypervisors[hv]) > 0:
+        #print "WARN: hypervisor %s exists but nova doesn't know that it has following VMs:" % hv
+        for vm in hypervisors[hv]:
+            if not lookup_far(nova_vms, vm["id"]):
+                print "Nova doesn't know that vm %s is running on %s" %(vm["id"], hv)
+        continue
+    for vm in hypervisors[hv]:
+        report = ""
+        if not lookup_near(nova_vms[hv], vm['id']):
+            if vm['id'] in lsdup:
+                continue
+            report += "WARN: VM %s is on hypervisor %s" % (vm['id'], hv)
+            nova_hvs = lookup_far(nova_vms, vm["id"])
+            if nova_hvs:
+                report +=  ", but nova thinks it is running on %s." % (str(nova_hvs))
+            else:
+                report += ", but nova doesn't know about it."
+            report += " VM state is %s " % vm['state']
+        if report:
+            print(report)
+if lost_vms:
+    print("Lost VMs report (existing in virsh without an UUID and completely untracked in Openstack)")
+for hv in lost_vms:
+     print(hv+":")
+     for vm in lost_vms[hv]:
+         print(vm)
diff --git a/scripts/vm_tracker/collect_data.sh b/scripts/vm_tracker/collect_data.sh
new file mode 100644
index 0000000..59566d5
--- /dev/null
+++ b/scripts/vm_tracker/collect_data.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+echo "Gathering the nova data, this may take a while"
+sudo salt -C "ctl01*" cmd.run ". /root/keystonercv3; nova --debug list --all --limit -1" > nova_vms
+echo "Gathering the virsh data, this may take even longer"
+sudo salt -t 10 -C "cmp*" cmd.run 'bash -c "paste <(virsh list --all --uuid) <(virsh list --all | grep instance)"' > virsh_vms