CKS: create HA cluster with 3 control VMs instead 2 (#8297)

This PR fixes the test failures with CKS HA-cluster upgrade.
In production, the CKS HA cluster should have at least 3 control VMs as well.
The etcd cluster requires 3 members to achieve reliable HA. The etcd daemon in control VMs uses RAFT protocol to determine the roles of nodes. During upgrade of CKS with HA, the etcd become unreliable if there are only 2 control VMs.
This commit is contained in:
Wei Zhou 2023-12-09 07:03:05 +01:00 committed by GitHub
parent 231a9eae2e
commit fc44df7c95
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 8 additions and 8 deletions

View File

@ -91,7 +91,7 @@ public class KubernetesClusterUpgradeWorker extends KubernetesClusterActionWorke
}
try {
result = SshHelper.sshExecute(publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, null,
String.format("sudo /opt/bin/kubectl drain %s --ignore-daemonsets --delete-local-data", hostName),
String.format("sudo /opt/bin/kubectl drain %s --ignore-daemonsets --delete-emptydir-data", hostName),
10000, 10000, 60000);
} catch (Exception e) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);

View File

@ -137,7 +137,7 @@ if [ -d "$BINARIES_DIR" ]; then
systemctl stop kubelet
cp -a ${BINARIES_DIR}/k8s/{kubelet,kubectl} /opt/bin
chmod +x {kubelet,kubectl}
chmod +x /opt/bin/{kubelet,kubectl}
systemctl daemon-reload
systemctl restart containerd

View File

@ -526,7 +526,7 @@ class TestKubernetesCluster(cloudstackTestCase):
@attr(tags=["advanced", "smoke"], required_hardware="true")
@skipTestIf("hypervisorNotSupported")
def test_07_deploy_kubernetes_ha_cluster(self):
"""Test to deploy a new Kubernetes cluster
"""Test to deploy a new HA Kubernetes cluster
# Validate the following:
# 1. createKubernetesCluster should return valid info for new cluster
@ -537,14 +537,14 @@ class TestKubernetesCluster(cloudstackTestCase):
if self.default_network:
self.skipTest("HA cluster on shared network requires external ip address, skipping it")
global k8s_cluster
k8s_cluster = self.getValidKubernetesCluster(1, 2)
k8s_cluster = self.getValidKubernetesCluster(1, 3)
self.debug("HA Kubernetes cluster with ID: %s successfully deployed" % k8s_cluster.id)
return
@attr(tags=["advanced", "smoke"], required_hardware="true")
@skipTestIf("hypervisorNotSupported")
def test_08_upgrade_kubernetes_ha_cluster(self):
"""Test to upgrade a Kubernetes cluster to newer version
"""Test to upgrade a HA Kubernetes cluster to newer version
# Validate the following:
# 1. upgradeKubernetesCluster should return valid info for the cluster
@ -554,7 +554,7 @@ class TestKubernetesCluster(cloudstackTestCase):
if self.default_network:
self.skipTest("HA cluster on shared network requires external ip address, skipping it")
global k8s_cluster
k8s_cluster = self.getValidKubernetesCluster(1, 2, version=self.kubernetes_version_v1)
k8s_cluster = self.getValidKubernetesCluster(1, 3, version=self.kubernetes_version_v1)
time.sleep(self.services["sleep"])
self.debug("Upgrading HA Kubernetes cluster with ID: %s" % k8s_cluster.id)
@ -581,7 +581,7 @@ class TestKubernetesCluster(cloudstackTestCase):
if self.default_network:
self.skipTest("HA cluster on shared network requires external ip address, skipping it")
global k8s_cluster
k8s_cluster = self.getValidKubernetesCluster(1, 2)
k8s_cluster = self.getValidKubernetesCluster(1, 3)
self.debug("Deleting Kubernetes cluster with ID: %s" % k8s_cluster.id)
return

View File

@ -278,7 +278,7 @@ export default {
initForm () {
this.formRef = ref()
this.form = reactive({
controlnodes: 2,
controlnodes: 3,
size: 1,
noderootdisksize: 8
})