Implement VM analyzer for looking up duplicated, misplaced and lost VMs
RELATED-PROD: PROD-35666
Change-Id: Ie0063bd7a6bbd2c8e276c2bdbd95be1cf1eeddcf
diff --git a/scripts/lbaas_integration.sh b/scripts/lbaas_integration.sh
new file mode 100644
index 0000000..cdf439a
--- /dev/null
+++ b/scripts/lbaas_integration.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+
+#A simple integration test that will allow you to check whether the LBaaS 2.0 integration is working on your cloud.
+#May require some minor tuning depending on the LB provider (i.e. specifying flavor, order of listener/pool creation etc)
+#Requires a project, permissive security group, tenant network, floating network, keypair and image/flavor to start the VM.
+
+#Testing of the load balancing is still manual. Start the test in screen, wait until the LB is up and then test it in another tab.
+IMAGE=6d989e06-493f-4f2d-9d07-81650fd04581
+FLAVOR=dfa1ecac-e8a9-408a-b93c-75077272e10e
+NETWORK=5435a882-32f0-4abf-b1f6-3f942f85c3e7
+FIP_NET=ee84f06b-be00-4295-a27a-4c6653e960ef
+KEY=qa-lb-kp
+SGID=008f8c20-133c-4ff8-9ae0-5d8de301b3e7
+PROTO=TCP
+PORT=22
+LBNAME=qa-testlb
+echo "Spawning the pool VMs"
+openstack server create lb-1 --image ${IMAGE} --flavor ${FLAVOR} --key ${KEY} --nic net-id=${NETWORK} --security-group ${SGID}
+openstack server create lb-2 --image ${IMAGE} --flavor ${FLAVOR} --key ${KEY} --nic net-id=${NETWORK} --security-group ${SGID}
+openstack server create lb-3 --image ${IMAGE} --flavor ${FLAVOR} --key ${KEY} --nic net-id=${NETWORK} --security-group ${SGID}
+echo "Obtaining the service subnet ID"
+SUBNET=`openstack network show ${NETWORK} | grep subnets | awk '{print $4}'`
+echo "Creating the load balancer"
+neutron lbaas-loadbalancer-create --name ${LBNAME} ${SUBNET}
+LB=`neutron lbaas-loadbalancer-list | grep ${LBNAME} | awk '{print $2}'`
+echo "Obtaining the LB VIP port and adding the permissive security group"
+LB_PORT=`neutron lbaas-loadbalancer-show ${LB} | grep 'port_id' | awk '{print $4}'`
+neutron port-update ${LB_PORT} --no-security-groups
+neutron port-update ${LB_PORT} --security-group ${SGID}
+echo "Creating the LB listener and pool"
+LISTENER=`neutron lbaas-listener-create --loadbalancer ${LB} --protocol ${PROTO} --protocol-port ${PORT} --name ${LBNAME}-listener | grep '\ id' | awk '{print $4}'`
+POOL=`neutron lbaas-pool-create --listener ${LISTENER} --protocol ${PROTO} --name ${LBNAME}-pool --lb-algorithm ROUND_ROBIN | grep '\ id' | awk '{print $4}'`
+echo "Obtaining the fixed IP addresses for the pool VMs"
+IP1=`openstack server show lb-1 | grep addresses | awk '{print $4}' | cut -d '=' -f 2`
+IP2=`openstack server show lb-2 | grep addresses | awk '{print $4}' | cut -d '=' -f 2`
+IP3=`openstack server show lb-3 | grep addresses | awk '{print $4}' | cut -d '=' -f 2`
+echo "Adding the VMs to the pool"
+neutron lbaas-member-create --protocol-port ${PORT} ${POOL} --subnet ${SUBNET} --address ${IP1}
+neutron lbaas-member-create --protocol-port ${PORT} ${POOL} --subnet ${SUBNET} --address ${IP2}
+neutron lbaas-member-create --protocol-port ${PORT} ${POOL} --subnet ${SUBNET} --address ${IP3}
+echo "Associating the floating IP with the LB VIP"
+FIP_ID=`neutron floatingip-create ${FIP_NET} | grep '\ id' | awk '{print $4}'`
+neutron floatingip-associate ${FIP_ID} ${LB_PORT}
+FIP_IP=`neutron floatingip-show ${FIP_ID} | grep address | awk '{print $4}'`
+echo Load balancer is up with the floating IP ${FIP_IP}:${PORT}, protocol ${PROTO}. Press Enter for LB decommissioning.
+read _
+neutron lbaas-member-list ${POOL} | grep ${SUBNET} | awk -v pool=${POOL} '{system("neutron lbaas-member-delete " $2 " " pool)}'
+neutron lbaas-pool-delete ${POOL}
+neutron lbaas-listener-delete ${LISTENER}
+neutron lbaas-loadbalancer-delete ${LB}
+neutron floatingip-delete ${FIP_ID}
+openstack server delete lb-1
+openstack server delete lb-2
+openstack server delete lb-3
diff --git a/scripts/vm_tracker/README b/scripts/vm_tracker/README
new file mode 100644
index 0000000..5e3152d
--- /dev/null
+++ b/scripts/vm_tracker/README
@@ -0,0 +1,13 @@
+VM tracking tool
+
+Q: What does it do?
+A: Searches for VMs that are duplicated (VMs with same IDs on different hypervisors), VMs that are misplaced (running on a different hypervisor that what Nova expects), VMs that are lost (existing in libvirt not having an uuid)
+
+Q: How does it work?
+A: By comparing the output of Nova (nova list --all) and virsh (virsh list --all, virsh list --uuid)
+
+Q: How do I use it?
+A: run "collect_data.sh" to gather the data from Nova and libvirt, then run "analyze.py" to get the results.
+
+Q: What does it need to run?
+A: Salt access, bash on the compute node, and a correct hypervisor name pattern set in the analyze.py (check comments in the source before running it).
diff --git a/scripts/vm_tracker/analyze.py b/scripts/vm_tracker/analyze.py
new file mode 100644
index 0000000..51adbb6
--- /dev/null
+++ b/scripts/vm_tracker/analyze.py
@@ -0,0 +1,101 @@
+#!/usr/bin/python
+import json
+def lookup_near(ls, name):
+ for vm in ls:
+ if vm['id'] == name:
+ return vm['id']
+ return False
+def lookup_far(dc, name):
+ result_hvs = []
+ for hv in dc:
+ res = lookup_near(dc[hv], name)
+ if res:
+ result_hvs.append(hv)
+ return result_hvs
+lost_vms = {}
+hypervisors = {}
+hypervisor_pattern = "cmp" #Replace with your own pattern, ensure it's unique so it wouldn't mix up with VM names
+skip_pattern = "------------"
+current_hv = ""
+vm_pattern = "-"
+with open("virsh_vms", "rt") as f:
+ for line in f.readlines():
+ line = line.replace("\n", "")
+ if skip_pattern in line:
+ continue
+ elif hypervisor_pattern in line:
+ current_hv = line.replace(":", "")
+ if current_hv in hypervisors:
+ print("Duplicate hypervisor %s, exiting" % current_hv)
+ break
+ else:
+ hypervisors[current_hv] = []
+ elif vm_pattern in line:
+ if not current_hv:
+ print("Malformed virsh list, exiting")
+ break
+ vm_info_struct = [x for x in line.replace("\n", "").replace("\t"," ").replace("shut off", "shutoff").split(" ") if x]
+ if len(vm_info_struct) == 4:
+ iid, virsh_id, iname, state = vm_info_struct
+ hypervisors[current_hv].append({"id": iid, "state": state})
+ elif len(vm_info_struct) == 3: #No UUID assigned
+ virsh_id, iname, state = vm_info_struct
+ if not lost_vms.has_key(current_hv):
+ lost_vms[current_hv] = [iname + ":" + state]
+ else:
+ lost_vms[current_hv].append(iname + ":" + state)
+nova_out = ""
+nova_vms = {}
+with open("nova_vms", "rt") as f:
+ for line in f.readlines():
+ if "servers" in line:
+ if "RESP BODY" in line:
+ nova_out = line.replace("RESP BODY: ", "").replace("\n", "")
+ nova_vms_json = json.loads(nova_out)
+ for vm in nova_vms_json['servers']:
+ vm_id = vm['id']
+ vm_iname = vm['OS-EXT-SRV-ATTR:instance_name']
+ vm_hv = vm['OS-EXT-SRV-ATTR:hypervisor_hostname']
+ vm_state = vm['OS-EXT-STS:vm_state']
+ if vm_hv not in nova_vms:
+ nova_vms[vm_hv] = []
+ nova_vms[vm_hv].append({"id": vm_id, "name": vm_iname, "state": vm_state})
+rev = {}
+lsdup = []
+for hv in hypervisors:
+ for vm in hypervisors[hv]:
+ if not vm['id'] in rev:
+ rev[vm['id']] = [hv+"(%s)"%vm['state']]
+ else:
+ rev[vm['id']].append(hv+"(%s)"%vm['state'])
+for vm_id in rev:
+ if len(rev[vm_id]) > 1:
+ print "Duplicate VM: %s on %s" % (vm_id, rev[vm_id])
+ lsdup.append(vm_id)
+for hv in hypervisors:
+ if hv not in nova_vms and len(hypervisors[hv]) > 0:
+ #print "WARN: hypervisor %s exists but nova doesn't know that it has following VMs:" % hv
+ for vm in hypervisors[hv]:
+ if not lookup_far(nova_vms, vm["id"]):
+ print "Nova doesn't know that vm %s is running on %s" %(vm["id"], hv)
+ continue
+ for vm in hypervisors[hv]:
+ report = ""
+ if not lookup_near(nova_vms[hv], vm['id']):
+ if vm['id'] in lsdup:
+ continue
+ report += "WARN: VM %s is on hypervisor %s" % (vm['id'], hv)
+ nova_hvs = lookup_far(nova_vms, vm["id"])
+ if nova_hvs:
+ report += ", but nova thinks it is running on %s." % (str(nova_hvs))
+ else:
+ report += ", but nova doesn't know about it."
+ report += " VM state is %s " % vm['state']
+ if report:
+ print(report)
+if lost_vms:
+ print("Lost VMs report (existing in virsh without an UUID and completely untracked in Openstack)")
+for hv in lost_vms:
+ print(hv+":")
+ for vm in lost_vms[hv]:
+ print(vm)
diff --git a/scripts/vm_tracker/collect_data.sh b/scripts/vm_tracker/collect_data.sh
new file mode 100644
index 0000000..59566d5
--- /dev/null
+++ b/scripts/vm_tracker/collect_data.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+echo "Gathering the nova data, this may take a while"
+sudo salt -C "ctl01*" cmd.run ". /root/keystonercv3; nova --debug list --all --limit -1" > nova_vms
+echo "Gathering the virsh data, this may take even longer"
+sudo salt -t 10 -C "cmp*" cmd.run 'bash -c "paste <(virsh list --all --uuid) <(virsh list --all | grep instance)"' > virsh_vms