Merge "Add verification of Calico cluster state after upgrade"
diff --git a/k8s-upgrade-pipeline.groovy b/k8s-upgrade-pipeline.groovy
index ef1c145..1c168f3 100644
--- a/k8s-upgrade-pipeline.groovy
+++ b/k8s-upgrade-pipeline.groovy
@@ -283,14 +283,141 @@
)['return'][0].values()[0].replaceAll('Salt command execution success','').trim().toBoolean()
}
-def checkCalicoUpgradeSuccessful(pepperEnv, target) {
+def calicoEnabled(pepperEnv, target) {
+ def salt = new com.mirantis.mk.Salt()
+ return salt.getPillar(pepperEnv, target, "kubernetes:pool:network:calico:enabled"
+ )["return"][0].values()[0].toBoolean()
+}
+
+def checkCalicoClusterState(pepperEnv, target) {
+ def common = new com.mirantis.mk.Common()
def salt = new com.mirantis.mk.Salt()
- stage("Checking cluster state after Calico upgrade") {
- // TODO add auto-check of results
- salt.cmdRun(pepperEnv, target, "calicoctl version | grep -i version")
- salt.cmdRun(pepperEnv, target, "calicoctl node status")
- salt.cmdRun(pepperEnv, target, "calicoctl node checksystem")
+ stage("Checking Calico cluster state after upgrade") {
+ // check Calico cluster and cli clients versions
+ def checkVer = [
+ "Client Version:": [verStr: "", dif: false, wrong: false],
+ "Cluster Version:": [verStr: "", dif: false, wrong: false]
+ ]
+ def checkVerPassed = true
+ def versionResults = salt.cmdRun(pepperEnv, target, "calicoctl version | grep -i version")['return'][0]
+ versionResults.each { k, v ->
+ // println("Node:\n${k}\nResult:\n${v}")
+ for (verLine in v.split("\n")) {
+ for (verType in checkVer.keySet()) {
+ if (verLine.contains(verType)) {
+ def verRec = checkVer[verType]
+ ver = (verLine - verType).trim()
+ if (!verRec.verStr) {
+ verRec.verStr = ver
+ }
+ if (verRec.verStr != ver) {
+ verRec.dif = true
+ checkVerPassed = false
+ }
+ version = ver.tokenize(".")
+ if ((version.size() < 3) || (version[0] != "v3")) {
+ verRec.wrong = true
+ checkVerPassed = false
+ }
+ checkVer[verType] = verRec
+ }
+ }
+ }
+ }
+ if (checkVerPassed) {
+ common.infoMsg("Calico version verification passed")
+ }
+ else {
+ def warningMsg = "Calico version verification failed.\n"
+ checkVer.each { k, rec ->
+ if (rec.dif) {
+ warningMsg += "${k} versions are different across nodes.\n"
+ }
+ if (rec.wrong) {
+ warningMsg += "${k} (some) versions are wrong - should be v3.x.\n"
+ }
+ }
+ common.warningMsg(warningMsg)
+ currentBuild.description += "<br><b>${warningMsg}</b><br><br>"
+ }
+
+ // check Calico nodes' statuses
+ def nodeStatusResults = salt.cmdRun(pepperEnv, target, "calicoctl node status")['return'][0]
+ def nodesRunning = true
+ def peersNotFound = []
+ def peersNotOnline = []
+ nodeStatusResults.each { k, v ->
+ // println("Node:\n${k}\nResult:\n${v}")
+ if (!v.contains("Calico process is running")) {
+ nodesRunning = false
+ def warningMsg = "Node ${k}: Calico node is not running."
+ common.warningMsg(warningMsg)
+ currentBuild.description += "<br><b>${warningMsg}</b><br><br>"
+ }
+ def nodePeersFound = false
+ def nodePeersOnline = true
+ for (nodeLine in v.split("\n")) {
+ if (nodeLine.contains("|") && (!nodeLine.contains("STATE"))) {
+ def col = nodeLine.tokenize("|").collect{it.trim()}
+ if (col.size() == 5) {
+ nodePeersFound = true
+ if ((col[2] != "up") || (col[4] != "Established")) {
+ def warningMsg = "Node ${k}: BGP peer '${col[0]}' is out of reach. Peer state: '${col[2]}', connection info: '${col[4]}'."
+ common.warningMsg(warningMsg)
+ currentBuild.description += "<br><b>${warningMsg}</b><br><br>"
+ nodePeersOnline = false
+ }
+ }
+ }
+ }
+ if (!nodePeersFound) {
+ peersNotFound += k
+ }
+ if (!nodePeersOnline) {
+ peersNotOnline += k
+ }
+ }
+ if (nodesRunning) {
+ common.infoMsg("All the Calico nodes are running")
+ }
+ if (peersNotFound) {
+ def warningMsg = "BGP peers not found for the node(s): " + peersNotFound.join(', ') + "."
+ common.warningMsg(warningMsg)
+ currentBuild.description += "<br><b>${warningMsg}</b><br><br>"
+ } else {
+ common.infoMsg("BGP peers were found for all the nodes")
+ }
+ if (!peersNotOnline) {
+ common.infoMsg("All reported BGP peers are reachable")
+ }
+
+ // check that 'calico-kube-controllers' is running
+ // one CTL node will be used to get pod's state using kubectl
+ def ctl_node = salt.getMinionsSorted(pepperEnv, CTL_TARGET)[0]
+ def kubeCtrlResult = salt.cmdRun(
+ pepperEnv, ctl_node, "kubectl get pod -n kube-system --selector=k8s-app=calico-kube-controllers"
+ )['return'][0].values()[0].toString()
+ if (kubeCtrlResult.contains("calico-kube-controllers")) {
+ for (line in kubeCtrlResult.split("\n")) {
+ if (line.contains("calico-kube-controllers")) {
+ col = line.tokenize(" ")
+ if ((col[1] != "1/1") || (col[2] != "Running")) {
+ def warningMsg = "Calico kube-controllers pod is not running properly."
+ common.warningMsg(warningMsg)
+ currentBuild.description += "<br><b>${warningMsg}</b><br><br>"
+ }
+ else {
+ common.infoMsg("Calico kube-controllers pod is running.")
+ }
+ break
+ }
+ }
+ } else {
+ def warningMsg = "Calico kube-controllers pod was not scheduled."
+ common.warningMsg(warningMsg)
+ currentBuild.description += "<br><b>${warningMsg}</b><br><br>"
+ }
}
}
@@ -500,9 +627,7 @@
startCalicoUpgrade(pepperEnv, ctl_node)
performCalicoConfigurationUpdateAndServicesRestart(pepperEnv, POOL, ctl_node)
completeCalicoUpgrade(pepperEnv, ctl_node)
- // after that no downtime is expected
-
- checkCalicoUpgradeSuccessful(pepperEnv, POOL)
+ // no downtime is expected after this point
}
/*
@@ -565,6 +690,11 @@
}
}
+ def ctl_node = salt.getMinionsSorted(pepperEnv, CTL_TARGET)[0]
+ if (calicoEnabled(pepperEnv, ctl_node)) {
+ checkCalicoClusterState(pepperEnv, POOL)
+ }
+
if (CONFORMANCE_RUN_AFTER.toBoolean()) {
def target = CTL_TARGET
def mcp_repo = ARTIFACTORY_URL