Merge "Update kube-addon-manager to latest upstream version"
diff --git a/kubernetes/files/kube-addon-manager/kube-addons.sh b/kubernetes/files/kube-addon-manager/kube-addons.sh
index 2ee28c4..8aefacc 100644
--- a/kubernetes/files/kube-addon-manager/kube-addons.sh
+++ b/kubernetes/files/kube-addon-manager/kube-addons.sh
@@ -31,6 +31,29 @@
 # managed result is of that. Start everything below that directory.
 KUBECTL=${KUBECTL_BIN:-/usr/bin/kubectl}
 KUBECTL_OPTS=${KUBECTL_OPTS:-}
+# KUBECTL_PRUNE_WHITELIST is a list of resources whitelisted by
+# default.
+# This is currently the same with the default in:
+# https://github.com/kubernetes/kubernetes/blob/master/pkg/kubectl/cmd/apply.go
+KUBECTL_PRUNE_WHITELIST=(
+  core/v1/ConfigMap
+  core/v1/Endpoints
+  core/v1/Namespace
+  core/v1/PersistentVolumeClaim
+  core/v1/PersistentVolume
+  core/v1/Pod
+  core/v1/ReplicationController
+  core/v1/Secret
+  core/v1/Service
+  batch/v1/Job
+  batch/v1beta1/CronJob
+  extensions/v1beta1/DaemonSet
+  extensions/v1beta1/Deployment
+  extensions/v1beta1/Ingress
+  extensions/v1beta1/ReplicaSet
+  apps/v1beta1/StatefulSet
+  apps/v1beta1/Deployment
+)
 
 ADDON_CHECK_INTERVAL_SEC=${TEST_ADDON_CHECK_INTERVAL_SEC:-60}
 ADDON_PATH=${ADDON_PATH:-/etc/kubernetes/addons}
@@ -47,6 +70,11 @@
 # will be reconciled for now.
 CLUSTER_SERVICE_LABEL="kubernetes.io/cluster-service"
 
+# Whether only one addon manager should be running in a multi-master setup.
+# Disabling this flag will force all addon managers to assume they are the
+# leaders.
+ADDON_MANAGER_LEADER_ELECTION=${ADDON_MANAGER_LEADER_ELECTION:-false}
+
 # Remember that you can't log from functions that print some output (because
 # logs are also printed on stdout).
 # $1 level
@@ -80,57 +108,61 @@
   esac
 }
 
+# Generate kubectl prune-whitelist flags from provided resource list.
+function generate_prune_whitelist_flags() {
+  local -r resources=($@)
+  for resource in "${resources[@]}"; do
+    printf "%s" "--prune-whitelist ${resource} "
+  done
+}
+
+# KUBECTL_EXTRA_PRUNE_WHITELIST is a list of extra whitelisted resources
+# besides the default ones.
+extra_prune_whitelist=
+if [ -n "${KUBECTL_EXTRA_PRUNE_WHITELIST:-}" ]; then
+  extra_prune_whitelist=( ${KUBECTL_EXTRA_PRUNE_WHITELIST:-} )
+fi
+prune_whitelist=( ${KUBECTL_PRUNE_WHITELIST[@]}  ${extra_prune_whitelist[@]} )
+prune_whitelist_flags=$(generate_prune_whitelist_flags ${prune_whitelist[@]})
+
+log INFO "== Generated kubectl prune whitelist flags: $prune_whitelist_flags =="
+
 # $1 filename of addon to start.
 # $2 count of tries to start the addon.
 # $3 delay in seconds between two consecutive tries
+# $4 namespace
 function start_addon() {
   local -r addon_filename=$1;
   local -r tries=$2;
   local -r delay=$3;
+  local -r namespace=$4
 
-  create_resource_from_string "$(cat ${addon_filename})" "${tries}" "${delay}" "${addon_filename}"
+  create_resource_from_string "$(cat ${addon_filename})" "${tries}" "${delay}" "${addon_filename}" "${namespace}"
 }
 
 # $1 string with json or yaml.
 # $2 count of tries to start the addon.
 # $3 delay in seconds between two consecutive tries
 # $4 name of this object to use when logging about it.
+# $5 namespace for this object
 function create_resource_from_string() {
   local -r config_string=$1;
   local tries=$2;
   local -r delay=$3;
   local -r config_name=$4;
+  local -r namespace=$5;
   while [ ${tries} -gt 0 ]; do
-    echo "${config_string}" | ${KUBECTL} ${KUBECTL_OPTS} apply -f - && \
-      log INFO "== Successfully started ${config_name} at $(date -Is)" && \
+    echo "${config_string}" | ${KUBECTL} ${KUBECTL_OPTS} --namespace="${namespace}" apply -f - && \
+      log INFO "== Successfully started ${config_name} in namespace ${namespace} at $(date -Is)" && \
       return 0;
     let tries=tries-1;
-    log WRN "== Failed to start ${config_name} at $(date -Is). ${tries} tries remaining. =="
+    log WRN "== Failed to start ${config_name} in namespace ${namespace} at $(date -Is). ${tries} tries remaining. =="
     sleep ${delay};
   done
   return 1;
 }
 
-# $1 resource type.
-function annotate_addons() {
-  local -r obj_type=$1;
-
-  # Annotate to objects already have this annotation should fail.
-  # Only try once for now.
-  ${KUBECTL} ${KUBECTL_OPTS} annotate ${obj_type} -l ${CLUSTER_SERVICE_LABEL}=true \
-    kubectl.kubernetes.io/last-applied-configuration='' --overwrite=false
-
-  if [[ $? -eq 0 ]]; then
-    log INFO "== Annotate resources completed successfully at $(date -Is) =="
-  else
-    log WRN "== Annotate resources completed with errors at $(date -Is) =="
-  fi
-}
-
-# $1 enable --prune or not.
 function reconcile_addons() {
-  local -r enable_prune=$1;
-
   # TODO: Remove the first command in future release.
   # Adding this for backward compatibility. Old addons have CLUSTER_SERVICE_LABEL=true and don't have
   # ADDON_MANAGER_LABEL=EnsureExists will still be reconciled.
@@ -139,12 +171,12 @@
   log INFO "== Reconciling with deprecated label =="
   ${KUBECTL} ${KUBECTL_OPTS} apply -f ${ADDON_PATH} \
     -l ${CLUSTER_SERVICE_LABEL}=true,${ADDON_MANAGER_LABEL}!=EnsureExists \
-    --prune=${enable_prune} --recursive | grep -v configured
+    --prune=true ${prune_whitelist_flags} --recursive | grep -v configured
 
   log INFO "== Reconciling with addon-manager label =="
   ${KUBECTL} ${KUBECTL_OPTS} apply -f ${ADDON_PATH} \
     -l ${CLUSTER_SERVICE_LABEL}!=true,${ADDON_MANAGER_LABEL}=Reconcile \
-    --prune=${enable_prune} --recursive | grep -v configured
+    --prune=true ${prune_whitelist_flags} --recursive | grep -v configured
 
   log INFO "== Kubernetes addon reconcile completed at $(date -Is) =="
 }
@@ -158,24 +190,46 @@
   log INFO "== Kubernetes addon ensure completed at $(date -Is) =="
 }
 
+function is_leader() {
+  # In multi-master setup, only one addon manager should be running. We use
+  # existing leader election in kube-controller-manager instead of implementing
+  # a separate mechanism here.
+  if ! $ADDON_MANAGER_LEADER_ELECTION; then
+    log INFO "Leader election disabled."
+    return 0;
+  fi
+  KUBE_CONTROLLER_MANAGER_LEADER=`${KUBECTL} -n kube-system get ep kube-controller-manager \
+    -o go-template=$'{{index .metadata.annotations "control-plane.alpha.kubernetes.io/leader"}}' \
+    | sed 's/^.*"holderIdentity":"\([^"]*\)".*/\1/' | awk -F'_' '{print $1}'`
+  # If there was any problem with getting the leader election results, var will
+  # be empty. Since it's better to have multiple addon managers than no addon
+  # managers at all, we're going to assume that we're the leader in such case.
+  log INFO "Leader is $KUBE_CONTROLLER_MANAGER_LEADER"
+  [[ "$KUBE_CONTROLLER_MANAGER_LEADER" == "" ||
+     "$HOSTNAME" == "$KUBE_CONTROLLER_MANAGER_LEADER" ]]
+}
+
 # The business logic for whether a given object should be created
 # was already enforced by salt, and /etc/kubernetes/addons is the
 # managed result is of that. Start everything below that directory.
 log INFO "== Kubernetes addon manager started at $(date -Is) with ADDON_CHECK_INTERVAL_SEC=${ADDON_CHECK_INTERVAL_SEC} =="
 
-## Wait for the default service account to be created in the kube-system namespace.
-#token_found=""
-#while [ -z "${token_found}" ]; do
-#  sleep .5
-#  token_found=$(${KUBECTL} ${KUBECTL_OPTS} get --namespace="${SYSTEM_NAMESPACE}" serviceaccount default -o go-template="{{with index .secrets 0}}{{.name}}{{end}}")
-#  if [[ $? -ne 0 ]]; then
-#    token_found="";
-#    log WRN "== Error getting default service account, retry in 0.5 second =="
-#  fi
-#done
-#
-#log INFO "== Default service account in the ${SYSTEM_NAMESPACE} namespace has token ${token_found} =="
-#
+# Create the namespace that will be used to host the cluster-level add-ons.
+start_addon /etc/kubernetes/addons/namespace.yaml 100 10 "" &
+
+# Wait for the default service account to be created in the kube-system namespace.
+token_found=""
+while [ -z "${token_found}" ]; do
+  sleep .5
+  token_found=$(${KUBECTL} ${KUBECTL_OPTS} get --namespace="${SYSTEM_NAMESPACE}" serviceaccount default -o go-template="{{with index .secrets 0}}{{.name}}{{end}}")
+  if [[ $? -ne 0 ]]; then
+    token_found="";
+    log WRN "== Error getting default service account, retry in 0.5 second =="
+  fi
+done
+
+log INFO "== Default service account in the ${SYSTEM_NAMESPACE} namespace has token ${token_found} =="
+
 # Create admission_control objects if defined before any other addon services. If the limits
 # are defined in a namespace other than default, we should still create the limits for the
 # default namespace.
@@ -184,38 +238,18 @@
   log INFO "++ obj ${obj} is created ++"
 done
 
-# TODO: The annotate and spin up parts should be removed after 1.6 is released.
-
-# Fake the "kubectl.kubernetes.io/last-applied-configuration" annotation on old resources
-# in order to clean them up by `kubectl apply --prune`.
-# RCs have to be annotated for 1.4->1.5+ upgrade, because we migrated from RCs to deployments for all default addons in 1.5.
-# Other types resources will also need this fake annotation if their names are changed,
-# otherwise they would be leaked during upgrade.
-log INFO "== Annotating the old addon resources at $(date -Is) =="
-annotate_addons ReplicationController
-annotate_addons Deployment
-
-# Create new addon resources by apply (with --prune=false).
-# The old RCs will not fight for pods created by new Deployments with the same label because the `controllerRef` feature.
-# The new Deployments will not fight for pods created by old RCs with the same label because the additional `pod-template-hash` label.
-# Apply will fail if some fields are modified but not are allowed, in that case should bump up addon version and name (e.g. handle externally).
-log INFO "== Executing apply to spin up new addon resources at $(date -Is) =="
-ensure_addons
-reconcile_addons false
-
-# Wait for new addons to be spinned up before delete old resources
-log INFO "== Wait for addons to be spinned up at $(date -Is) =="
-sleep ${ADDON_CHECK_INTERVAL_SEC}
-
 # Start the apply loop.
 # Check if the configuration has changed recently - in case the user
 # created/updated/deleted the files on the master.
 log INFO "== Entering periodical apply loop at $(date -Is) =="
 while true; do
   start_sec=$(date +"%s")
-  # Only print stderr for the readability of logging
-  ensure_addons
-  reconcile_addons true
+  if is_leader; then
+    ensure_addons
+    reconcile_addons
+  else
+    log INFO "Not elected leader, going back to sleep."
+  fi
   end_sec=$(date +"%s")
   len_sec=$((${end_sec}-${start_sec}))
   # subtract the time passed from the sleep time
diff --git a/kubernetes/files/kube-addon-manager/namespace.yaml b/kubernetes/files/kube-addon-manager/namespace.yaml
new file mode 100644
index 0000000..4f03434
--- /dev/null
+++ b/kubernetes/files/kube-addon-manager/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: kube-system
\ No newline at end of file
diff --git a/kubernetes/master/setup.sls b/kubernetes/master/setup.sls
index cac63d5..54a98c2 100644
--- a/kubernetes/master/setup.sls
+++ b/kubernetes/master/setup.sls
@@ -41,6 +41,14 @@
     - mode: 644
     - makedirs: True
 
+/etc/kubernetes/addons/namespace.yaml:
+  file.managed:
+    - source: salt://kubernetes/files/kube-addon-manager/namespace.yaml
+    - user: root
+    - group: root
+    - mode: 644
+    - makedirs: True
+
 kube-addon-manager_service:
   service.running:
   - name: kube-addon-manager