blob: 3f838b6ef780e9b0696115598193ad0b3f9f175b [file] [log] [blame]
#!/bin/bash
set -x
set -e
# allow access to the local variables from prepare-metadata.py
set -a
# ensure we don't re-source this in the same environment
[[ -z "$_FUNCTIONS_SCRIPT" ]] || return 0
declare -r -g _FUNCTIONS_SCRIPT=1
PUBLIC_INTERFACE_NETMASK=$(echo ${PUBLIC_INTERFACE_CIDR} | cut -d'/' -f2)
STORAGE_BACKEND_NETWORK_NETMASK=$(echo ${STORAGE_BACKEND_NETWORK} | cut -d'/' -f2)
STORAGE_FRONTEND_NETWORK_NETMASK=$(echo ${STORAGE_FRONTEND_NETWORK} | cut -d'/' -f2)
IRONIC_BAREMETAL_INTERFACE_IP=${IRONIC_BAREMETAL_INTERFACE_IP:-$ironic_baremetal_interface_ip}
IRONIC_BAREMETAL_NETWORK_NETMASK=$(echo ${IRONIC_BAREMETAL_NETWORK} | cut -d'/' -f2)
IRONIC_BAREMETAL_INTERFACE=$(ip route |grep ${IRONIC_BAREMETAL_NETWORK} | awk '/ src / {print $3}')
TUNNEL_INTERFACE_NETWORK_NETMASK=$(ip -o addr show |grep -w ${TUNNEL_INTERFACE_IP} | awk '{print $4}' |awk -F '/' '{print $2}')
TUNNEL_INTERFACE=$(ip -o addr show |grep -w ${TUNNEL_INTERFACE_IP}/${TUNNEL_INTERFACE_NETWORK_NETMASK} | awk '{print $2}')
IRONIC_BAREMETAL_NETWORK_PREFIX=$(sed 's/[0-9]*\/[0-9]*$//' <<< $IRONIC_BAREMETAL_NETWORK)
IRONIC_BAREMETAL_TUNNEL_NETWORK_PREFIX=$(sed 's/[0-9]*\/[0-9]*$//' <<< $IRONIC_BAREMETAL_TUNNEL_NETWORK)
STORAGE_FRONTEND_NETWORK_NETMASK=$(echo ${STORAGE_FRONTEND_NETWORK} | cut -d'/' -f2)
DOCKER_DEFAULT_ADDRESS_POOL=${DOCKER_DEFAULT_ADDRESS_POOL:-10.10.1.0/16}
# DOCKER_DEFAULT_ADDRESS_SIZE have to be less then netmask in DOCKER_DEFAULT_ADDRESS_POOL because
# to the fact that actual netmask for docker_gwbridge is given from it
DOCKER_DEFAULT_ADDRESS_SIZE=${DOCKER_DEFAULT_ADDRESS_SIZE:-24}
DOCKER_EE_RELEASE=${DOCKER_EE_RELEASE:-stable-19.03}
DOCKER_EE_PACKAGES=${DOCKER_EE_PACKAGES:-'docker-ee'}
BINARY_BASE_URL=${BINARY_BASE_URL:-"http://binary.mirantis.com"}
CODENAME=$(cat /etc/*release | awk -F '=' '/_CODENAME/ {print $2}' | head -1)
### COMMON FUNCTIONS ###
function get_interface_prefix {
local interface=$1
local interface_ip
local interface_mask
interface_ip=$(ip addr show dev ${interface} | grep -Po 'inet \K[\d.]+' | egrep -v "127.0.|172.17")
interface_mask=$(ip addr show dev ${interface} | grep -Po 'inet \K[\d.]+\/[\d]+' | egrep -v "127.0.|172.17" | cut -d'/' -f2)
echo "${interface_ip}/${interface_mask}"
}
### END COMMON FUNCTIONS ###
DEFAULT_INTERFACE=${DEFAULT_INTERFACE:-$(ip route show |awk '/default/ {print $5}')}
if [[ -n ${CONTROL_NETWORK_CIDR} ]]; then
CONTROL_IP_ADDRESS=$(ip route |grep ${CONTROL_NETWORK_CIDR} | head -n1 | fgrep -v ' via ' | awk '/ src / {print $9}')
else
CONTROL_IP_ADDRESS=$(get_interface_prefix ${DEFAULT_INTERFACE} | awk -F '/' '{print $1}')
CONTROL_NETWORK_CIDR=$(ip route show dev ${DEFAULT_INTERFACE} | awk '/kernel/ {print $1}')
fi
PUBLIC_INTERFACE=${PUBLIC_INTERFACE:-ens4}
UCP_USERNAME=${UCP_USERNAME:-admin}
UCP_PASSWORD=${UCP_PASSWORD:-administrator}
OS_CODENAME=$(lsb_release -c -s)
NODE_DEPLOYMENT_RETRIES=${NODE_DEPLOYMENT_RETRIES:-15}
FLOATING_NETWORK_PREFIXES=${FLOATING_NETWORK_PREFIXES:-10.11.12.0/24}
PUBLIC_INTERFACE=${PUBLIC_INTERFACE:-ens4}
UCP_MASTER_HOST=${UCP_MASTER_HOST:-${CONTROL_IP_ADDRESS}}
UCP_IP_ADDRESS=${UCP_IP_ADDRESS:-$CONTROL_IP_ADDRESS}
# Change default VXLAN port, need for https://mirantis.jira.com/browse/PRODX-11679
UCP_DOCKER_SWARM_DATA_PORT=${UCP_DOCKER_SWARM_DATA_PORT:-4789}
UCP_DOCKER_CALICO_VXLAN_PORT=${UCP_DOCKER_CALICO_VXLAN_PORT:-${UCP_DOCKER_SWARM_DATA_PORT}}
NTP_SERVERS=${NTP_SERVERS:-"ldap.scc.mirantis.net ldap.bud.mirantis.net"}
IRONIC_BAREMETAL_VXLAN_INTERFACE='vxlan10'
HUGE_PAGES_1G_SIZE=$(echo "${HUGE_PAGES}" | awk -F ',' '{print $1}')
HUGE_PAGES_2Mb_SIZE=$(echo "${HUGE_PAGES}" | awk -F ',' '{print $2}')
CONFIGURE_HUGE_PAGES=false
TUNNEL_INTERFACE_NETPLAN_MANAGE=true
TUNGSTENFABRIC_ENABLED=${TUNGSTENFABRIC_ENABLED:-false}
if [[ "${HUGE_PAGES_1G_SIZE}" -gt 0 ]] || [[ "${HUGE_PAGES_2Mb_SIZE}" -gt 0 ]]; then
CONFIGURE_HUGE_PAGES=true
if [[ "${TUNGSTENFABRIC_ENABLED,,}" == false ]]; then
TUNNEL_INTERFACE_NETPLAN_MANAGE=false
fi
fi
SINGLE_NODE=${SINGLE_NODE:-true}
#FRR Options
FRR_BGP_AS_NUMBER=${FRR_BGP_AS_NUMBER:-64512}
FRR_ROUTER_ID=${TUNNEL_INTERFACE_IP}
FRR_BGP_SOURCE_ADDRESS=${TUNNEL_INTERFACE_IP}
FRR_BGP_NEIGHBORS=${FRR_BGP_NEIGHBORS:-}
FRR_EVPN_TUNNELS_RANGE=${FRR_EVPN_TUNNELS_RANGE:-'20 50'}
FRR_EVPN_VXLAN_DST_PORT=${FRR_EVPN_VXLAN_DST_PORT:-4790}
LVM_LOOP_DEVICE_SIZE=${LVM_LOOP_DEVICE_SIZE:-0}
CINDER_LVM_LOOP_DEVICE_SIZE=${CINDER_LVM_LOOP_DEVICE_SIZE:-0}
_KUBECTL_CMD="kubectl --kubeconfig /etc/kubernetes/admin.conf "
function retry {
local retries=$1
shift
local msg="$1"
shift
local count=0
until "$@"; do
exit=$?
wait=$((2 ** $count))
count=$(($count + 1))
if [ $count -lt $retries ]; then
echo "Retry $count/$retries exited $exit, retrying in $wait seconds..."
sleep $wait
else
echo "Retry $count/$retries exited $exit, no more retries left."
echo "$msg"
return $exit
fi
done
return 0
}
function install_pkgs {
local pkgs="$@"
function _retry_install {
apt update && DEBIAN_FRONTEND=noninteractive apt install -o DPkg::Options::=--force-confdef -y $pkgs
}
retry 10 "Labeling node failed" _retry_install
}
function setup_bind_mounts {
mkdir -p /var/lib/persistant-data
for component in nova glance cinder docker; do
if [[ -d /var/lib/${component} ]]; then
continue
fi
mkdir -p /var/lib/data/${component} /var/lib/${component}
echo "/var/lib/data/${component} /var/lib/${component} none bind 0 0" >> /etc/fstab
done
if [[ "${KUBERNETES_INSTALLER}" == "k0s" ]]; then
mkdir -p /var/lib/k0s/kubelet
mkdir -p /var/lib/kubelet
echo "/var/lib/k0s/kubelet /var/lib/kubelet none bind 0 0" >> /etc/fstab
fi
mount -a
}
function configure_atop {
sed -i 's/INTERVAL=600/INTERVAL=60/' /usr/share/atop/atop.daily
systemctl restart atop
}
function disable_unattended_upgr {
# Disable unattended upgrades
sed -i 's/1/0/' /etc/apt/apt.conf.d/20auto-upgrades
sed -i 's/APT::Periodic::Update-Package-Lists "1"/APT::Periodic::Update-Package-Lists "0"/g' /etc/apt/apt.conf.d/*
}
function configure_logind_conf {
cat <<EOF >/etc/systemd/logind.conf
[Login]
#NAutoVTs=6
#ReserveVT=6
#KillUserProcesses=no
#KillOnlyUsers=
#KillExcludeUsers=root
InhibitDelayMaxSec=600
#UserStopDelaySec=10
#HandlePowerKey=poweroff
#HandleSuspendKey=suspend
#HandleHibernateKey=hibernate
#HandleLidSwitch=suspend
#HandleLidSwitchExternalPower=suspend
#HandleLidSwitchDocked=ignore
#HandleRebootKey=reboot
#PowerKeyIgnoreInhibited=no
#SuspendKeyIgnoreInhibited=no
#HibernateKeyIgnoreInhibited=no
#LidSwitchIgnoreInhibited=yes
#RebootKeyIgnoreInhibited=no
#HoldoffTimeoutSec=30s
#IdleAction=ignore
#IdleActionSec=30min
#RuntimeDirectorySize=10%
#RuntimeDirectoryInodesMax=400k
#RemoveIPC=yes
#InhibitorsMax=8192
#SessionsMax=8192
EOF
systemctl daemon-reload
}
function install_required_packages {
local pkg_list="apt-transport-https ca-certificates curl software-properties-common jq unzip atop iptables-persistent socat ntp rng-tools open-iscsi nfs-common"
if [[ "${CONFIGURE_HUGE_PAGES}" == true ]]; then
# Since jammy the igb_uio kernel module contains in the dpdk-kmods-dkms package
if [[ ${CODENAME} == "bionic" ]] || [[ ${CODENAME} == "focal" ]]; then
pkg_list="$pkg_list dpdk-igb-uio-dkms"
else
pkg_list="$pkg_list dpdk-kmods-dkms"
fi
# linux-image-extra-$(uname -r) provides uio_pci_generic module, used by TF DPDK as default driver
if [[ "${TUNGSTENFABRIC_ENABLED,,}" == true ]]; then
pkg_list="$pkg_list linux-modules-extra-$(uname -r)"
fi
fi
install_pkgs $pkg_list
# Since version 4 yq uses another syntax
if [[ ! -f /usr/bin/yq ]]; then
curl --retry 6 --retry-delay 5 -L ${BINARY_BASE_URL}/openstack/bin/utils/yq/yq-v3.3.2 -o /usr/bin/yq
chmod +x /usr/bin/yq
fi
}
function enable_iscsi {
systemctl enable iscsid
systemctl restart iscsid
}
function configure_lvm {
function _setup_lvm {
local device_num=$1
local device_size=$2
local vg_name=$3
if vgdisplay | grep ${vg_name}; then
echo "Volume group ${vg_name} is already initialize"
else
#configure lvm only on compute nodes
if [[ ${NODE_METADATA} == *"openstack-compute-node"* ]]; then
truncate --size ${device_size}G /srv/disk${device_num}
cat <<EOF > /etc/systemd/system/setup-loopback-loop${device_num}.service
[Unit]
Description=Setup loop${device_num} device
DefaultDependencies=no
After=systemd-udev-settle.service
Before=lvm2-activation-early.service
Wants=systemd-udev-settle.service
[Service]
ExecStart=-/sbin/losetup /dev/loop${device_num} /srv/disk${device_num}
RemainAfterExit=true
Type=oneshot
[Install]
WantedBy=local-fs.target
EOF
systemctl enable setup-loopback-loop${device_num}
systemctl start setup-loopback-loop${device_num}
#adding short sleep to give time for service to start
sleep 3
pvcreate /dev/loop${device_num}
vgcreate ${vg_name} /dev/loop${device_num}
fi
fi
}
if [[ "${LVM_LOOP_DEVICE_SIZE}" -gt 0 ]]; then
_setup_lvm 10 ${LVM_LOOP_DEVICE_SIZE} nova-vol
fi
if [[ "${CINDER_LVM_LOOP_DEVICE_SIZE}" -gt 0 ]]; then
_setup_lvm 11 ${CINDER_LVM_LOOP_DEVICE_SIZE} cinder-vol
fi
}
function install_docker {
function install_retry {
curl --retry 6 --retry-delay 5 -fsSL "${DOCKER_EE_URL}/gpg" | sudo apt-key add -
add-apt-repository "deb [arch=amd64] ${DOCKER_EE_URL}/ ${OS_CODENAME} ${DOCKER_EE_RELEASE}"
install_pkgs ${DOCKER_EE_PACKAGES}
if [[ $KUBERNETES_CONTAINER_RUNTIME == "crio" ]]; then
NERDCTL_VERSION=1.7.6
curl --retry 6 --retry-delay 5 -fsSL https://github.com/containerd/nerdctl/releases/download/v${NERDCTL_VERSION}/nerdctl-${NERDCTL_VERSION}-linux-amd64.tar.gz --output nerdctl-${NERDCTL_VERSION}-linux-amd64.tar.gz
tar zxvf nerdctl-${NERDCTL_VERSION}-linux-amd64.tar.gz -C /usr/local/bin
fi
}
retry 10 "Failed to install docker" install_retry
}
function prepare_docker_config {
mkdir -p /etc/docker
cat <<EOF > /etc/docker/daemon.json
{
"default-cgroupns-mode": "host",
"default-address-pools": [
{ "base": "${DOCKER_DEFAULT_ADDRESS_POOL}", "size": ${DOCKER_DEFAULT_ADDRESS_SIZE} }
],
"log-driver": "json-file",
"log-opts": {
"max-size": "1g",
"max-file": "3"
}
}
EOF
if [[ $KUBERNETES_CONTAINER_RUNTIME == "crio" ]]; then
mkdir -p /etc/containerd/
cat <<EOF> /etc/containerd/config.toml
version = 2
[plugins]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes]
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc]
runtime_type = "io.containerd.runc.v2"
[plugins."io.containerd.grpc.v1.cri".containerd.runtimes.runc.options]
SystemdCgroup = true
EOF
systemctl restart containerd || /bin/true
fi
}
function pull_image {
local image="$1"
function pull_image_retry {
if [[ $KUBERNETES_CONTAINER_RUNTIME == "crio" ]]; then
nerdctl --address unix:///run/containerd/containerd.sock -n k8s.io pull $image
fi
docker pull ${image}
}
retry 10 "Can't pool docker image $image" pull_image_retry
}
function cache_images {
# Cache UCP image with retries
pull_image "${DOCKER_UCP_IMAGE}"
local image_prefix=${DOCKER_UCP_IMAGE%/*}
for image in $(docker container run --rm --name ucp -v /var/run/docker.sock:/var/run/docker.sock ${DOCKER_UCP_IMAGE} images --list); do
pull_image "${image_prefix}/${image##*/}"
if [[ $KUBERNETES_CONTAINER_RUNTIME == "crio" ]]; then
nerdctl --address unix:///run/containerd/containerd.sock -n k8s.io tag ${image_prefix}/${image##*/} mirantis/${image##*/}
fi
docker tag ${image_prefix}/${image##*/} mirantis/${image##*/}
done
}
function install_ucp {
if docker ps --all | grep ucp-controller; then
echo "Docker UCP container is running"
else
local tmpd
tmpd=$(mktemp -d)
function docker_run_retry {
docker container run --rm --name ucp \
-v /var/run/docker.sock:/var/run/docker.sock \
${DOCKER_UCP_IMAGE} install \
--host-address $UCP_IP_ADDRESS \
--admin-username $UCP_USERNAME \
--admin-password $UCP_PASSWORD \
--existing-config
}
retry 10 "Can't bring up docker UCP container" docker_run_retry
fi
}
function download_k0s {
curl -sSLf https://get.k0s.sh | sudo sh
}
function install_k0s {
mkdir -p /etc/k0s
k0s config create > /etc/k0s/k0s.yaml
k0s install controller -c /etc/k0s/k0s.yaml
k0s start
sleep 60
mkdir -p /etc/kubernetes /root/.kube/
k0s kubeconfig admin > /etc/kubernetes/admin.conf
cp /etc/kubernetes/admin.conf /root/.kube/config
mkdir -p /etc/k0s
k0s kubeconfig admin > /etc/k0s/admin.conf
k0s token create --role=worker > /etc/k0s/worker_token.yaml
k0s token create --role=controller > /etc/k0s/controller_token.yaml
install_pkgs nginx
rm -f /etc/nginx/sites-enabled/default
ln -s /etc/k0s/ /var/www/k0s
cat << EOF > /etc/nginx/sites-enabled/k0s
server {
listen *:80;
root /var/www;
location /k0s {
autoindex on;
}
}
EOF
systemctl restart nginx
}
function get_authtoken_retry {
# Download the bundle https://docs.docker.com/ee/ucp/user-access/cli/
# Create an environment variable with the user security token
AUTHTOKEN=$(curl --retry 6 --retry-delay 5 -sk -d '{"username":"'$UCP_USERNAME'","password":"'$UCP_PASSWORD'"}' https://${UCP_MASTER_HOST}/auth/login | jq -r .auth_token)
if [ -z ${AUTHTOKEN} ]; then
return -1
fi
}
function download_bundles {
local tmpd
tmpd=$(mktemp -d)
function download_bundles_retry {
# Download the client certificate bundle
curl --retry 6 --retry-delay 5 -k -H "Authorization: Bearer $AUTHTOKEN" https://${UCP_MASTER_HOST}/api/clientbundle -o ${tmpd}/bundle.zip
}
retry 10 "Can't get AUTHTOKEN from master." get_authtoken_retry
retry 10 "Can't download bundle file from master." download_bundles_retry
pushd $tmpd
# Unzip the bundle.
unzip bundle.zip
# Run the utility script.
eval "$(<env.sh)"
mkdir -p /etc/kubernetes /root/.kube/
cp kube.yml /etc/kubernetes/admin.conf
cp kube.yml /root/.kube/config
popd
}
function wait_for_node {
function retry_wait {
${_KUBECTL_CMD} get nodes |grep -w Ready |awk '{print $1}' |grep -q $(hostname)
}
retry $NODE_DEPLOYMENT_RETRIES "The node didn't come up." retry_wait
}
function download_k8s_metadata {
mkdir -p /etc/k0s/
mkdir -p /etc/kubernetes
for f in worker_token.yaml controller_token.yaml admin.conf; do
curl --retry 6 --retry-delay 5 -L http://${UCP_MASTER_HOST}/k0s/${f} -o /etc/k0s/${f}
done
cp /etc/k0s/admin.conf /etc/kubernetes/admin.conf
mkdir -p /root/.kube/
cp /etc/k0s/admin.conf /root/.kube/config
}
function join_node {
if kubectl --kubeconfig /etc/kubernetes/admin.conf get nodes |grep -w Ready |awk '{print $1}' |grep -q $(hostname); then
echo "This node is already join"
else
local type=${1}
function retry_join_node {
env -i $(docker swarm join-token $type |grep 'docker swarm join' | xargs)
sleep 20 && systemctl restart docker && sleep 20
}
retry 10 "Failed to join node to swarm" retry_join_node
fi
}
function join_k0s_node {
k0s install worker --token-file /etc/k0s/worker_token.yaml
k0s start
}
function create_ucp_config {
if [[ "${SINGLE_NODE}" == true ]]; then
max_pods="kubelet_max_pods = 220"
fi
if [[ "${SECURE_OVERLAY_ENABLED,,}" == true ]]; then
secure_overlay="secure_overlay = true
calico_vxlan = false"
fi
kubelet_custom_flags=""
if [[ $KUBERNETES_CONTAINER_RUNTIME == "crio" ]]; then
kubelet_custom_flags='custom_kubelet_flags=["--cgroup-driver=systemd", "--container-runtime-endpoint=unix:///run/containerd/containerd.sock"]'
fi
if docker config ls | grep com.docker.ucp.config ; then
echo "Config com.docker.ucp.config already exists"
else
echo "
[scheduling_configuration]
enable_admin_ucp_scheduling = true
default_node_orchestrator = \"kubernetes\"
[cluster_config]
dns = [\"172.18.208.44\"]
calico_vxlan_port = \"$UCP_DOCKER_CALICO_VXLAN_PORT\"
${max_pods}
${secure_overlay}
${kubelet_custom_flags}
" | docker config create com.docker.ucp.config -
fi
}
function swarm_init {
if docker node ls | grep $HOSTNAME; then
echo "This node is already part of a swarm"
else
docker swarm init --advertise-addr ${UCP_IP_ADDRESS} --data-path-addr ${UCP_IP_ADDRESS} --listen-addr ${UCP_IP_ADDRESS} --data-path-port ${UCP_DOCKER_SWARM_DATA_PORT}
sleep 5 && systemctl restart docker && sleep 5
fi
}
function rm_ucp_config {
docker config rm com.docker.ucp.config
}
function install_kubectl {
local kubectl=/usr/local/bin/kubectl
curl --retry 6 --retry-delay 5 -L ${BINARY_BASE_URL}/openstack/bin/utils/kubectl/kubectl-${KUBECTL_VERSION}-linux -o ${kubectl}
chmod +x ${kubectl}
cat << EOF >> ~/.bashrc
source /usr/share/bash-completion/bash_completion
source <(kubectl completion bash)
EOF
}
function configure_ntp {
echo "" > /etc/ntp.conf
for server in $NTP_SERVERS; do
cat << EOF >> /etc/ntp.conf
server ${server} iburst
EOF
done
cat << EOF >> /etc/ntp.conf
# Set general access to this service
restrict -4 default kod nomodify notrap nopeer noquery
restrict -6 default kod nomodify notrap nopeer noquery
# Allow access from localhost
restrict 127.0.0.1
restrict ::1
# Do not listen on any interface address by default
interface ignore wildcard
interface listen lo
interface listen ${DEFAULT_INTERFACE}
# Location of drift file
driftfile /var/lib/ntp/ntp.drift
EOF
systemctl disable systemd-timesyncd
systemctl enable ntp
systemctl restart ntp
}
function wait_for_external_network {
function _check_access {
curl --connect-timeout 10 ${DOCKER_UCP_IMAGE%%/*} || (sleep 1; /bin/false)
}
retry 180 "Failed to wait for external networks reachable." _check_access
}
function prepare_network {
if [ -z "${CONTROL_IP_ADDRESS}" ]; then
wait_condition_send "FAILURE" "CONTROL_IP_ADDRESS is not found."
exit 1
fi
systemctl restart systemd-resolved
# Make sure local hostname is present in /etc/hosts
sed -i "s/127.0.0.1 localhost/127.0.0.1 localhost\n${CONTROL_IP_ADDRESS} $(hostname -s).cluster.local $(hostname -s)/" /etc/hosts
}
function workaround_default_forward_policy {
local should_run=0
for net in $FLOATING_NETWORK_PREFIXES; do
if ! iptables -nvL DOCKER-USER |grep $net; then
should_run=1
break
fi
done
if [[ $should_run == 0 ]]; then
echo "Iptables is already configured"
else
cat << EOF > /etc/iptables/rules.v4
*filter
:DOCKER-USER - [0:0]
EOF
for net in $FLOATING_NETWORK_PREFIXES; do
cat << EOF >> /etc/iptables/rules.v4
-A DOCKER-USER -d ${net} -j ACCEPT
-A DOCKER-USER -s ${net} -j ACCEPT
EOF
done
cat << EOF >> /etc/iptables/rules.v4
-A DOCKER-USER -j RETURN
COMMIT
EOF
#Allow access to Internet from VMs for virtualized environment.
cat << EOF >> /etc/iptables/rules.v4
*nat
:POSTROUTING ACCEPT - [0:0]
EOF
for net in $FLOATING_NETWORK_PREFIXES; do
cat << EOF >> /etc/iptables/rules.v4
-A POSTROUTING -s ${net} -o ${DEFAULT_INTERFACE} -j MASQUERADE
EOF
done
cat << EOF >> /etc/iptables/rules.v4
COMMIT
EOF
sudo netfilter-persistent reload
fi
}
function disable_rp_filter {
# Run this func before "network_config" to create new interfaces with the default rp_filter value
cat << EOF > /etc/sysctl.d/99-disable-rp-filter.conf
net.ipv4.conf.all.rp_filter=0
net.ipv4.conf.default.rp_filter=0
EOF
for iface in $(find /proc/sys/net/ipv4/conf/ -name "enp*" -o -name "ens*" -o -name "eth*"); do
echo 0 > $iface/rp_filter
done
sysctl -p /etc/sysctl.d/99-disable-rp-filter.conf
}
function configure_sysctl_limits {
cat << EOF > /etc/sysctl.d/99-limits.conf
fs.inotify.max_user_instances = 8192
EOF
sysctl -p /etc/sysctl.d/99-limits.conf
}
function configure_contrack {
cat << EOF > /etc/sysctl.d/100-contrackd.conf
net.netfilter.nf_conntrack_log_invalid=255
net.netfilter.nf_conntrack_tcp_be_liberal=1
EOF
sysctl -p /etc/sysctl.d/100-contrackd.conf
}
# NOTE(vsaienko): disable calling iptables when sending packets via bridge.
function disable_iptables_for_bridges {
cat << EOF > /etc/sysctl.d/101-bridge-nf-call-iptables.conf
net.bridge.bridge-nf-call-ip6tables=0
net.bridge.bridge-nf-call-iptables=0
net.bridge.bridge-nf-call-arptables=0
EOF
sysctl -p /etc/sysctl.d/101-bridge-nf-call-iptables.conf
}
function network_config {
PUBLIC_NODE_IP_ADDRESS=${PUBLIC_INTERFACE_IP:-$(ip addr show dev ${PUBLIC_INTERFACE} | grep -Po 'inet \K[\d.]+' | egrep -v "127.0.|172.17")}
PUBLIC_NODE_IP_NETMASK=${PUBLIC_INTERFACE_NETMASK:-$(ip addr show dev ${PUBLIC_INTERFACE} | grep -Po 'inet \K[\d.]+\/[\d]+' | egrep -v "127.0.|172.17" | cut -d'/' -f2)}
local public_interface=${1:-${PUBLIC_INTERFACE}}
local cloud_netplan_cfg="/etc/netplan/50-cloud-init.yaml"
local match_ip_line
install_pkgs bridge-utils
cat << EOF > /etc/systemd/network/10-veth-phy-br.netdev
[NetDev]
Name=veth-phy
Kind=veth
[Peer]
Name=veth-br
EOF
if [[ -n ${IRONIC_BAREMETAL_INTERFACE} ]]; then
cat << EOF > /etc/systemd/network/11-veth-bm.netdev
[NetDev]
Name=veth-bm
Kind=veth
[Peer]
Name=veth-bm-br
EOF
cat << EOF > /etc/systemd/network/12-veth-vbmc.netdev
[NetDev]
Name=veth-vbmc
Kind=veth
[Peer]
Name=veth-vbmc-br
EOF
fi
sed -i 's/.*ethernets:.*/&\n veth-phy: {}/' ${cloud_netplan_cfg}
sed -i 's/.*ethernets:.*/&\n veth-br: {}/' ${cloud_netplan_cfg}
if [[ -n ${IRONIC_BAREMETAL_INTERFACE} ]]; then
sed -i 's/.*ethernets:.*/&\n veth-bm: {}/' ${cloud_netplan_cfg}
sed -i 's/.*ethernets:.*/&\n veth-bm-br: {}/' ${cloud_netplan_cfg}
sed -i 's/.*ethernets:.*/&\n veth-vbmc: {}/' ${cloud_netplan_cfg}
sed -i 's/.*ethernets:.*/&\n veth-vbmc-br: {}/' ${cloud_netplan_cfg}
if [[ "${IRONIC_MT_ENABLED,,}" == true ]] ; then
sed -i "s/.*ethernets:.*/&\n ${IRONIC_BAREMETAL_VXLAN_INTERFACE}: {}/" ${cloud_netplan_cfg}
sed -i 's/.*ethernets:.*/&\n veth-pxe: {}/' ${cloud_netplan_cfg}
sed -i 's/.*ethernets:.*/&\n veth-pxe-br: {}/' ${cloud_netplan_cfg}
cat << EOF > /etc/systemd/network/13-veth-pxe.netdev
[NetDev]
Name=veth-pxe
Kind=veth
[Peer]
Name=veth-pxe-br
EOF
sed -i "s/- ${IRONIC_BAREMETAL_NETWORK_PREFIX}\([0-9]*\)/- ${IRONIC_BAREMETAL_TUNNEL_NETWORK_PREFIX}\1/" ${cloud_netplan_cfg}
cat << EOF >> ${cloud_netplan_cfg}
vlans:
pxe.1000:
id: 1000
link: veth-pxe
addresses:
- ${IRONIC_BAREMETAL_INTERFACE_IP}/${IRONIC_BAREMETAL_NETWORK_NETMASK}
EOF
else
ironic_baremetal_address_match_ip_line=$(grep -nm1 "${IRONIC_BAREMETAL_INTERFACE_IP}/${IRONIC_BAREMETAL_NETWORK_NETMASK}" ${cloud_netplan_cfg} | cut -d: -f1)
sed -i "$((${ironic_baremetal_address_match_ip_line}-1)),$((${ironic_baremetal_address_match_ip_line}))d" ${cloud_netplan_cfg}
fi
fi
public_address_match_ip_line=$(grep -nm1 "${PUBLIC_NODE_IP_ADDRESS}/${PUBLIC_NODE_IP_NETMASK}" ${cloud_netplan_cfg} | cut -d: -f1)
if [ -n "${public_address_match_ip_line}" ] ; then
sed -i "$((${public_address_match_ip_line}-1)),$((${public_address_match_ip_line}))d" ${cloud_netplan_cfg}
fi
cat << EOF >> ${cloud_netplan_cfg}
bridges:
br-public:
dhcp4: false
interfaces:
- ${PUBLIC_INTERFACE}
- veth-br
addresses:
- ${PUBLIC_NODE_IP_ADDRESS}/${PUBLIC_NODE_IP_NETMASK}
EOF
# Assign more ips for neutron dynamic routing PRODX-31417
if [[ ${NODE_METADATA} == *"tempest"* ]]; then
for i in {71..76}; do
cat << EOF >> ${cloud_netplan_cfg}
- ${PUBLIC_NODE_IP_ADDRESS%.*}.${i}/${PUBLIC_NODE_IP_NETMASK}
EOF
done
fi
# Remove Tunnel interface from netplan
if [[ $TUNNEL_INTERFACE_NETPLAN_MANAGE == false ]]; then
sed -i "/ ${TUNNEL_INTERFACE}/,/ set-name: ${TUNNEL_INTERFACE}/d" ${cloud_netplan_cfg}
fi
if [[ -n ${IRONIC_BAREMETAL_INTERFACE} ]]; then
cat << EOF >> ${cloud_netplan_cfg}
br-baremetal:
dhcp4: false
interfaces:
- veth-bm-br
- veth-vbmc-br
EOF
if [[ "${IRONIC_MT_ENABLED,,}" != true ]] ; then
cat << EOF >> ${cloud_netplan_cfg}
- ${IRONIC_BAREMETAL_INTERFACE}
addresses:
- ${IRONIC_BAREMETAL_INTERFACE_IP}/${IRONIC_BAREMETAL_NETWORK_NETMASK}
EOF
else
cat << EOF >> ${cloud_netplan_cfg}
- ${IRONIC_BAREMETAL_VXLAN_INTERFACE}
- veth-pxe-br
EOF
cat << EOF > /etc/systemd/system/ironic-vxlan-tunnel.service
[Unit]
Description=Ironic VXLAN tunnel
After=network.target
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/sbin/ip link add ${IRONIC_BAREMETAL_VXLAN_INTERFACE} type vxlan id 10 group 239.1.1.10 dstport 0 dev ${IRONIC_BAREMETAL_INTERFACE}
ExecStart=/sbin/ip link set ${IRONIC_BAREMETAL_VXLAN_INTERFACE} up
[Install]
WantedBy=multi-user.target
EOF
systemctl enable ironic-vxlan-tunnel.service
systemctl start ironic-vxlan-tunnel.service
fi
fi
netplan --debug apply
# NOTE(vsaienko): give some time to apply changes
sleep 15
# Remove Tunnel interface from netplan
if [[ $TUNNEL_INTERFACE_NETPLAN_MANAGE == false ]]; then
ip addr flush ${TUNNEL_INTERFACE}
ip link set ${TUNNEL_INTERFACE} up
fi
}
$functions_override
function set_node_labels {
function set_node_labels_retry {
${_KUBECTL_CMD} patch node $(hostname) -p "{\"metadata\": $(echo $NODE_METADATA | jq -c ".")}"
}
retry 10 "Labeling node failed" set_node_labels_retry
}
HW_METADATA='{}'
function prepare_metadata_files {
/usr/sbin/prepare-metadata.py --metadata-file /usr/share/metadata/lab-metadata.yaml
}
function collect_ceph_metadata {
local ceph_osd_node
ceph_osd_node=$(${_KUBECTL_CMD} get nodes -l role=ceph-osd-node -o jsonpath={.items[?\(@.metadata.name==\"$(hostname)\"\)].metadata.name})
if [[ -f /usr/share/metadata/ceph.yaml && ${ceph_osd_node} ]]; then
HW_METADATA="{\"ceph\": {\"$(hostname)\": \"$(base64 -w 0 /usr/share/metadata/ceph.yaml)\"}}"
ceph_store_drive=$(cat /usr/share/metadata/ceph.yaml | egrep '\- name\: vd?' | awk '{print $3}')
if [[ -b /dev/${ceph_store_drive} ]]; then
sgdisk --zap-all /dev/${ceph_store_drive}
fi
fi
}
function collect_interfaces_metadata {
local if_metadata_file="/usr/share/metadata/interfaces.yaml"
if [ -z "${TUNNEL_INTERFACE}" ] ; then
echo "Tunnel interface is empty"
else
pci_id=$(ethtool -i ${TUNNEL_INTERFACE} |grep bus-info | awk '{print $2}')
mac=$(cat $(find /sys/bus/pci/devices/${pci_id}/ -name net)/${TUNNEL_INTERFACE}/address)
ip=${TUNNEL_INTERFACE_IP}/${TUNNEL_INTERFACE_NETWORK_NETMASK}
cat << EOF > ${if_metadata_file}
${TUNNEL_INTERFACE}:
pci_id: "${pci_id}"
ip_addr: "${ip}"
mac: "${mac}"
EOF
fi
HW_METADATA=$(echo ${HW_METADATA} | jq -cr ". += {\"interfaces\": {\"$(hostname)\": \"$(base64 -w 0 ${if_metadata_file})\"}}")
}
function nested_virt_config {
if grep --color vmx /proc/cpuinfo; then
cat << EOF > /etc/modprobe.d/qemu-system-x86.conf
options kvm_intel nested=1
options kvm_intel enable_apicv=1
options kvm_intel ept=1
EOF
modprobe -r kvm_intel || true
modprobe kvm_intel nested=1
else
echo "Nested virtualization is not supported"
fi
}
function configure_huge_pages {
local nr_2Mb
local nr_1G
local apply_grub=false
if [[ ! $(cat /etc/default/grub | grep -E ^GRUB_CMDLINE_LINUX=".*hugepagesz=2M.*") ]] && [[ "${HUGE_PAGES_2Mb_SIZE}" -gt 0 ]]; then
nr_2Mb=$(( "${HUGE_PAGES_2Mb_SIZE}" / 2 ))
echo "Configure GRUB kernel cmd to allocate ${nr_2Mb} 2Mb huge pages"
echo "GRUB_CMDLINE_LINUX=\"\$GRUB_CMDLINE_LINUX hugepagesz=2M hugepages=$nr_2Mb\"" >> /etc/default/grub
apply_grub=true
fi
if [[ ! $(cat /etc/default/grub | grep -E ^GRUB_CMDLINE_LINUX=".*hugepagesz=1G.*") ]] && [[ "${HUGE_PAGES_1G_SIZE}" -gt 0 ]]; then
echo "Configure GRUB kernel cmd to allocate ${HUGE_PAGES_1G_SIZE} 1G huge pages"
echo "GRUB_CMDLINE_LINUX=\"\$GRUB_CMDLINE_LINUX hugepagesz=1G hugepages=${HUGE_PAGES_1G_SIZE}\"" >> /etc/default/grub
apply_grub=true
fi
if [[ "${apply_grub}" == true ]]; then
update-grub
cloud-init clean -r
else
echo "GRUB already configured with huge pages"
fi
}
function disable_master_taint {
function disable_master_taint_retry {
# Enable "Allow all authenticated users, including service accounts to schedule on all nodes, including UCP managers."
curl --retry 6 --retry-delay 5 -k -H "Authorization: Bearer $AUTHTOKEN" -k -X PUT https://${UCP_MASTER_HOST}/collectionGrants/authenticated/swarm/scheduler
}
retry 10 "Can't enable permission to schedule all pods on UCP master." disable_master_taint_retry
}
function disable_ipv6 {
cat << EOF > /etc/sysctl.d/11-ipv6-disable.conf
net.ipv6.conf.all.disable_ipv6 = 1
net.ipv6.conf.default.disable_ipv6 = 1
net.ipv6.conf.lo.disable_ipv6 = 1
EOF
sysctl -p /etc/sysctl.d/11-ipv6-disable.conf
}
function setup_evpn {
cat << EOF > /etc/netplan/60-evpn.yaml
network:
version: 2
ethernets:
EOF
for vni in $(seq ${FRR_EVPN_TUNNELS_RANGE}); do
cat << EOF >> /etc/netplan/60-evpn.yaml
vxlan$vni: {}
EOF
done
cat << EOF >> /etc/netplan/60-evpn.yaml
bridges:
EOF
for vni in $(seq ${FRR_EVPN_TUNNELS_RANGE}); do
cat << EOF >> /etc/netplan/60-evpn.yaml
br$vni:
dhcp4: false
link-local: []
parameters:
stp: false
interfaces:
- vxlan$vni
addresses:
- 192.168.$vni.254/24
EOF
done
for vni in $(seq ${FRR_EVPN_TUNNELS_RANGE}); do
cat << EOF > /etc/systemd/system/evpn-vxlan-tunnel-${vni}.service
[Unit]
Description=EVPN tunnel ${vni}
After=network.target
[Service]
Type=oneshot
RemainAfterExit=true
ExecStart=/sbin/ip link add vxlan${vni} type vxlan id ${vni} local ${TUNNEL_INTERFACE_IP} nolearning dstport ${FRR_EVPN_VXLAN_DST_PORT}
ExecStart=/sbin/ip link set vxlan${vni} up
[Install]
WantedBy=multi-user.target
EOF
systemctl enable evpn-vxlan-tunnel-${vni}.service
systemctl start evpn-vxlan-tunnel-${vni}.service
done
netplan --debug apply
# NOTE(vsaienko): give some time to apply changes
sleep 15
}
function install_frr {
function _install_frr_packages {
FRRVER="frr-stable"
curl -s https://mirror.mirantis.com/.snapshots/frr-stable-latest/keys.asc | sudo apt-key add -
add-apt-repository "deb [arch=amd64] https://mirror.mirantis.com/.snapshots/frr-stable-latest/ ${OS_CODENAME} ${FRRVER}"
install_pkgs frr frr-pythontools
}
retry 3 "Can't install frr packages" _install_frr_packages
sed -i 's/^bgpd=no/bgpd=yes/g' /etc/frr/daemons
cat << EOF > /etc/frr/frr.conf
log syslog informational
no ipv6 forwarding
service integrated-vtysh-config
!
router bgp ${FRR_BGP_AS_NUMBER}
bgp router-id ${FRR_ROUTER_ID}
no bgp default ipv4-unicast
bgp cluster-id ${FRR_ROUTER_ID}
coalesce-time 1000
neighbor rr peer-group
neighbor rr capability extended-nexthop
neighbor rr remote-as ${FRR_BGP_AS_NUMBER}
neighbor rr update-source ${FRR_BGP_SOURCE_ADDRESS}
EOF
for neighbor_ip in $(echo $FRR_BGP_NEIGHBORS | tr "," "\n"); do
cat << EOF >> /etc/frr/frr.conf
neighbor $neighbor_ip peer-group rr
EOF
done
cat << EOF >> /etc/frr/frr.conf
!
address-family ipv4 unicast
neighbor rr activate
exit-address-family
!
address-family ipv4 vpn
neighbor rr activate
exit-address-family
!
address-family l2vpn evpn
neighbor rr activate
advertise-all-vni
advertise-default-gw
exit-address-family
!
line vty
!
EOF
systemctl restart frr
disable_ipv6
setup_evpn
}
function cron_disable_calico_offloading {
# The workaround is to disable offloading on vxlan interface
if [ -f /etc/cron.d/disable_calico_offloading ]; then
echo "Cronjob for disable callico offloading already exists"
else
script_path="/usr/sbin/calico_disable_offloading.sh"
cat << 'EOF' >> $script_path
#!/usr/bin/env bash
if /sbin/ethtool --show-offload vxlan.calico | grep -E '^.x-checksumming:' | grep -q ': on'; then
/sbin/ethtool --offload vxlan.calico rx off tx off > /dev/null
fi
EOF
chmod +x $script_path
cat << EOF >> /etc/cron.d/disable_calico_offloading
* * * * * root $script_path 2>&1 | /usr/bin/logger -t calico_disable_offloading
EOF
fi
}
function increase_iscsi_timeout {
cat <<EOF > /etc/udev/rules.d/50-iscsi-timeout.rules
ACTION=="add", SUBSYSTEM=="scsi" , SYSFS{type}=="0|7|14",
RUN+="/bin/sh -c 'echo 120 > /sys/block/%k/device/timeout'"
EOF
udevadm control --reload-rules && udevadm trigger
}