Skip to content

Commit

Permalink
Disable systemd resolved service on1804 (Azure#579)
Browse files Browse the repository at this point in the history
* Disable resolved for 1804

* Add unit test

* update gomod

* revert change to go.mod
  • Loading branch information
Tony Xu authored Feb 13, 2021
1 parent 689062f commit 05c24aa
Show file tree
Hide file tree
Showing 93 changed files with 6,123 additions and 50 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,4 @@ require (
k8s.io/apimachinery v0.0.0-20190221213512-86fb29eff628
k8s.io/client-go v10.0.0+incompatible
k8s.io/klog v1.0.0 // indirect
)
)
17 changes: 17 additions & 0 deletions parts/linux/cloud-init/artifacts/cse_config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -288,6 +288,23 @@ configureCNIIPTables() {
fi
}

disable1804SystemdResolved() {
ls -ltr /etc/resolv.conf
cat /etc/resolv.conf
{{- if Disable1804SystemdResolved}}
UBUNTU_RELEASE=$(lsb_release -r -s)
if [[ ${UBUNTU_RELEASE} == "18.04" ]]; then
echo "Ingorings systemd-resolved query service but using its resolv.conf file"
echo "This is the simplest approach to workaround resolved issues without completely uninstall it"
[ -f /run/systemd/resolve/resolv.conf ] && sudo ln -sf /run/systemd/resolve/resolv.conf /etc/resolv.conf
ls -ltr /etc/resolv.conf
cat /etc/resolv.conf
fi
{{- else}}
echo "Disable1804SystemdResolved is false. Skipping."
{{- end}}
}

{{- if NeedsContainerd}}
ensureContainerd() {
{{- if TeleportEnabled}}
Expand Down
2 changes: 2 additions & 0 deletions parts/linux/cloud-init/artifacts/cse_main.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ if [ -f /opt/azure/containers/provision.complete ]; then
exit 0
fi

disable1804SystemdResolved

UBUNTU_RELEASE=$(lsb_release -r -s)
if [[ ${UBUNTU_RELEASE} == "16.04" ]]; then
sudo apt-get -y autoremove chrony
Expand Down
3 changes: 3 additions & 0 deletions pkg/agent/baker.go
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,9 @@ func getContainerServiceFuncMap(config *datamodel.NodeBootstrappingConfiguration
cs := config.ContainerService
profile := config.AgentPoolProfile
return template.FuncMap{
"Disable1804SystemdResolved": func() bool {
return config.Disable1804SystemdResolved
},
"IsIPMasqAgentEnabled": func() bool {
return cs.Properties.IsIPMasqAgentEnabled()
},
Expand Down
37 changes: 36 additions & 1 deletion pkg/agent/baker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,43 @@ var _ = Describe("Assert generated customData and cseCmd", func() {
TransparentHugePageDefrag: "defer+madvise",
SwapFileSizeMB: &swapFileSizeMB,
}
}))
}),

Entry("AKSUbuntu1604 with Disable1804SystemdResolved=true", "AKSUbuntu1604+Disable1804SystemdResolved=true", "1.16.13", func(config *datamodel.NodeBootstrappingConfiguration) {
config.Disable1804SystemdResolved = true
config.ContainerService.Properties.AgentPoolProfiles[0].KubernetesConfig = &datamodel.KubernetesConfig{
KubeletConfig: map[string]string{},
ContainerRuntime: datamodel.Docker,
}
config.ContainerService.Properties.AgentPoolProfiles[0].VMSize = "Standard_NC6"
}),

Entry("AKSUbuntu1604 with Disable1804SystemdResolved=false", "AKSUbuntu1604+Disable1804SystemdResolved=false", "1.16.13", func(config *datamodel.NodeBootstrappingConfiguration) {
config.Disable1804SystemdResolved = false
config.ContainerService.Properties.AgentPoolProfiles[0].KubernetesConfig = &datamodel.KubernetesConfig{
KubeletConfig: map[string]string{},
ContainerRuntime: datamodel.Docker,
}
config.ContainerService.Properties.AgentPoolProfiles[0].VMSize = "Standard_NC6"
}),

Entry("AKSUbuntu1804 with Disable1804SystemdResolved=true", "AKSUbuntu1804+Disable1804SystemdResolved=true", "1.19.13", func(config *datamodel.NodeBootstrappingConfiguration) {
config.Disable1804SystemdResolved = true
config.ContainerService.Properties.AgentPoolProfiles[0].KubernetesConfig = &datamodel.KubernetesConfig{
KubeletConfig: map[string]string{},
ContainerRuntime: datamodel.Containerd,
}
config.ContainerService.Properties.AgentPoolProfiles[0].VMSize = "Standard_NC6"
}),

Entry("AKSUbuntu1804 with Disable1804SystemdResolved=false", "AKSUbuntu1804+Disable1804SystemdResolved=false", "1.19.13", func(config *datamodel.NodeBootstrappingConfiguration) {
config.Disable1804SystemdResolved = false
config.ContainerService.Properties.AgentPoolProfiles[0].KubernetesConfig = &datamodel.KubernetesConfig{
KubeletConfig: map[string]string{},
ContainerRuntime: datamodel.Containerd,
}
config.ContainerService.Properties.AgentPoolProfiles[0].VMSize = "Standard_NC6"
}))
})

var _ = Describe("Assert generated customData and cseCmd for Windows", func() {
Expand Down
1 change: 1 addition & 0 deletions pkg/agent/datamodel/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -1436,6 +1436,7 @@ type NodeBootstrappingConfiguration struct {
ResourceGroupName string
UserAssignedIdentityClientID string
ConfigGPUDriverIfNeeded bool
Disable1804SystemdResolved bool
EnableGPUDevicePluginIfNeeded bool
EnableKubeletConfigFile bool
EnableNvidia bool
Expand Down

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ if [ -f /opt/azure/containers/provision.complete ]; then
exit 0
fi

disable1804SystemdResolved

UBUNTU_RELEASE=$(lsb_release -r -s)
if [[ ${UBUNTU_RELEASE} == "16.04" ]]; then
sudo apt-get -y autoremove chrony
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,12 @@ configureCNIIPTables() {
/sbin/ebtables -t nat --list
fi
}

disable1804SystemdResolved() {
ls -ltr /etc/resolv.conf
cat /etc/resolv.conf
echo "Disable1804SystemdResolved is false. Skipping."
}
ensureDocker() {
DOCKER_SERVICE_EXEC_START_FILE=/etc/systemd/system/docker.service.d/exec_start.conf
wait_for_file 1200 1 $DOCKER_SERVICE_EXEC_START_FILE || exit $ERR_FILE_WATCH_TIMEOUT
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
echo $(date),$(hostname) > /var/log/azure/cluster-provision-cse-output.log; retrycmd_if_failure() { r=$1; w=$2; t=$3; shift && shift && shift; for i in $(seq 1 $r); do timeout $t ${@}; [ $? -eq 0 ] && break || if [ $i -eq $r ]; then return 1; else sleep $w; fi; done }; ERR_OUTBOUND_CONN_FAIL=50; retrycmd_if_failure 150 1 3 nc -vz mcr.microsoft.com 443 >> /var/log/azure/cluster-provision-cse-output.log 2>&1 || exit $ERR_OUTBOUND_CONN_FAIL; for i in $(seq 1 1200); do grep -Fq "EOF" /opt/azure/containers/provision.sh && break; if [ $i -eq 1200 ]; then exit 100; else sleep 1; fi; done; ADMINUSER=azureuser MOBY_VERSION= TENANT_ID=tenantID KUBERNETES_VERSION=1.16.13 HYPERKUBE_URL=hyperkube-amd64:v1.16.13 KUBE_BINARY_URL= KUBEPROXY_URL= APISERVER_PUBLIC_KEY= SUBSCRIPTION_ID=subID RESOURCE_GROUP=resourceGroupName LOCATION=southcentralus VM_TYPE=vmss SUBNET=subnet1 NETWORK_SECURITY_GROUP=aks-agentpool-36873793-nsg VIRTUAL_NETWORK=aks-vnet-07752737 VIRTUAL_NETWORK_RESOURCE_GROUP=MC_rg ROUTE_TABLE=aks-agentpool-36873793-routetable PRIMARY_AVAILABILITY_SET= PRIMARY_SCALE_SET=aks-agent2-36873793-vmss SERVICE_PRINCIPAL_CLIENT_ID=ClientID SERVICE_PRINCIPAL_CLIENT_SECRET='Secret' KUBELET_PRIVATE_KEY= NETWORK_PLUGIN= NETWORK_POLICY= VNET_CNI_PLUGINS_URL=https://acs-mirror.azureedge.net/azure-cni/v1.1.3/binaries/azure-vnet-cni-linux-amd64-v1.1.3.tgz CNI_PLUGINS_URL=https://acs-mirror.azureedge.net/cni/cni-plugins-amd64-v0.7.6.tgz CLOUDPROVIDER_BACKOFF=<nil> CLOUDPROVIDER_BACKOFF_MODE= CLOUDPROVIDER_BACKOFF_RETRIES=0 CLOUDPROVIDER_BACKOFF_EXPONENT=0 CLOUDPROVIDER_BACKOFF_DURATION=0 CLOUDPROVIDER_BACKOFF_JITTER=0 CLOUDPROVIDER_RATELIMIT=<nil> CLOUDPROVIDER_RATELIMIT_QPS=0 CLOUDPROVIDER_RATELIMIT_QPS_WRITE=0 CLOUDPROVIDER_RATELIMIT_BUCKET=0 CLOUDPROVIDER_RATELIMIT_BUCKET_WRITE=0 LOAD_BALANCER_DISABLE_OUTBOUND_SNAT=<nil> USE_MANAGED_IDENTITY_EXTENSION=false USE_INSTANCE_METADATA=false LOAD_BALANCER_SKU= EXCLUDE_MASTER_FROM_STANDARD_LB=true MAXIMUM_LOADBALANCER_RULE_COUNT=0 CONTAINER_RUNTIME=docker CLI_TOOL=docker CONTAINERD_DOWNLOAD_URL_BASE=https://storage.googleapis.com/cri-containerd-release/ NETWORK_MODE= KUBE_BINARY_URL= USER_ASSIGNED_IDENTITY_ID=userAssignedID API_SERVER_NAME= IS_VHD=true GPU_NODE=false SGX_NODE=false AUDITD_ENABLED=false CONFIG_GPU_DRIVER_IF_NEEDED=true ENABLE_GPU_DEVICE_PLUGIN_IF_NEEDED=false TELEPORTD_PLUGIN_DOWNLOAD_URL= /usr/bin/nohup /bin/bash -c "/bin/bash /opt/azure/containers/provision_start.sh"

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
/bin/bash /opt/azure/containers/provision.sh >> /var/log/azure/cluster-provision.log 2>&1
EXIT_CODE=$?
systemctl --no-pager -l status kubelet >> /var/log/azure/cluster-provision-cse-output.log 2>&1
OUTPUT=$(cat /var/log/azure/cluster-provision-cse-output.log | head -n 30)
JSON_STRING=$( jq -n \
--arg ec "$EXIT_CODE" \
--arg op "$OUTPUT" \
--arg er "" \
'{ExitCode: $ec, Output: $op, Error: $er}' )
echo $JSON_STRING
exit $EXIT_CODE
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
#!/bin/bash
ERR_FILE_WATCH_TIMEOUT=6
set -x
if [ -f /opt/azure/containers/provision.complete ]; then
echo "Already ran to success exiting..."
exit 0
fi

disable1804SystemdResolved

UBUNTU_RELEASE=$(lsb_release -r -s)
if [[ ${UBUNTU_RELEASE} == "16.04" ]]; then
sudo apt-get -y autoremove chrony
echo $?
sudo systemctl restart systemd-timesyncd
fi

echo $(date),$(hostname), startcustomscript>>/opt/m

for i in $(seq 1 3600); do
if [ -s /opt/azure/containers/provision_source.sh ]; then
grep -Fq '#HELPERSEOF' /opt/azure/containers/provision_source.sh && break
fi
if [ $i -eq 3600 ]; then
exit $ERR_FILE_WATCH_TIMEOUT
else
sleep 1
fi
done
sed -i "/#HELPERSEOF/d" /opt/azure/containers/provision_source.sh
source /opt/azure/containers/provision_source.sh

wait_for_file 3600 1 /opt/azure/containers/provision_installs.sh || exit $ERR_FILE_WATCH_TIMEOUT
source /opt/azure/containers/provision_installs.sh

wait_for_file 3600 1 /opt/azure/containers/provision_configs.sh || exit $ERR_FILE_WATCH_TIMEOUT
source /opt/azure/containers/provision_configs.sh

set +x
ETCD_PEER_CERT=$(echo ${ETCD_PEER_CERTIFICATES} | cut -d'[' -f 2 | cut -d']' -f 1 | cut -d',' -f $((${NODE_INDEX}+1)))
ETCD_PEER_KEY=$(echo ${ETCD_PEER_PRIVATE_KEYS} | cut -d'[' -f 2 | cut -d']' -f 1 | cut -d',' -f $((${NODE_INDEX}+1)))
set -x

if [[ $OS == $COREOS_OS_NAME ]]; then
echo "Changing default kubectl bin location"
KUBECTL=/opt/kubectl
fi

if [ -f /var/run/reboot-required ]; then
REBOOTREQUIRED=true
else
REBOOTREQUIRED=false
fi

configureAdminUser
cleanUpContainerd


if [[ "${GPU_NODE}" != "true" ]]; then
cleanUpGPUDrivers
fi

VHD_LOGS_FILEPATH=/opt/azure/vhd-install.complete
if [ -f $VHD_LOGS_FILEPATH ]; then
echo "detected golden image pre-install"
cleanUpContainerImages
FULL_INSTALL_REQUIRED=false
else
if [[ "${IS_VHD}" = true ]]; then
echo "Using VHD distro but file $VHD_LOGS_FILEPATH not found"
exit $ERR_VHD_FILE_NOT_FOUND
fi
FULL_INSTALL_REQUIRED=true
fi

if [[ $OS == $UBUNTU_OS_NAME ]] && [ "$FULL_INSTALL_REQUIRED" = "true" ]; then
installDeps
else
echo "Golden image; skipping dependencies installation"
fi

if [[ $OS == $UBUNTU_OS_NAME ]]; then
ensureAuditD
fi

installContainerRuntime

installNetworkPlugin

installKubeletKubectlAndKubeProxy

if [[ $OS != $COREOS_OS_NAME ]]; then
ensureRPC
fi

createKubeManifestDir

configureK8s

configureCNI


ensureDocker

ensureMonitorService

ensureSysctl
ensureKubelet
ensureJournal
ensureUpdateNodeLabels

if $FULL_INSTALL_REQUIRED; then
if [[ $OS == $UBUNTU_OS_NAME ]]; then

echo 2dd1ce17-079e-403c-b352-a1921ee207ee > /sys/bus/vmbus/drivers/hv_util/unbind
sed -i "13i\echo 2dd1ce17-079e-403c-b352-a1921ee207ee > /sys/bus/vmbus/drivers/hv_util/unbind\n" /etc/rc.local
fi
fi
rm -f /etc/apt/apt.conf.d/99periodic

if [[ $OS == $UBUNTU_OS_NAME ]]; then
apt_get_purge 20 30 120 apache2-utils &
fi

VALIDATION_ERR=0

API_SERVER_DNS_RETRIES=20
if [[ $API_SERVER_NAME == *.privatelink.* ]]; then
API_SERVER_DNS_RETRIES=200
fi
RES=$(retrycmd_if_failure ${API_SERVER_DNS_RETRIES} 1 3 nslookup ${API_SERVER_NAME})
STS=$?
if [[ $STS != 0 ]]; then
if [[ $RES == *"168.63.129.16"* ]]; then
VALIDATION_ERR=$ERR_K8S_API_SERVER_AZURE_DNS_LOOKUP_FAIL
else
VALIDATION_ERR=$ERR_K8S_API_SERVER_DNS_LOOKUP_FAIL
fi
else
API_SERVER_CONN_RETRIES=50
if [[ $API_SERVER_NAME == *.privatelink.* ]]; then
API_SERVER_CONN_RETRIES=100
fi
retrycmd_if_failure ${API_SERVER_CONN_RETRIES} 1 3 nc -vz ${API_SERVER_NAME} 443 || VALIDATION_ERR=$ERR_K8S_API_SERVER_CONN_FAIL
fi

if $REBOOTREQUIRED; then
echo 'reboot required, rebooting node in 1 minute'
/bin/bash -c "shutdown -r 1 &"
if [[ $OS == $UBUNTU_OS_NAME ]]; then
aptmarkWALinuxAgent unhold &
fi
else
if [[ $OS == $UBUNTU_OS_NAME ]]; then
/usr/lib/apt/apt.systemd.daily &
aptmarkWALinuxAgent unhold &
fi
fi

echo "Custom script finished. API server connection check code:" $VALIDATION_ERR
echo $(date),$(hostname), endcustomscript>>/opt/m
mkdir -p /opt/azure/containers && touch /opt/azure/containers/provision.complete
ps auxfww > /opt/azure/provision-ps.log &

exit $VALIDATION_ERR

#EOF
Loading

0 comments on commit 05c24aa

Please sign in to comment.