Merge remote-tracking branch 'origin/4.19'

This commit is contained in:
Rohit Yadav 2024-02-07 14:21:05 +05:30
commit 2bfa9de282
4 changed files with 59 additions and 57 deletions

View File

@ -1956,6 +1956,7 @@ public class KubernetesClusterManagerImpl extends ManagerBase implements Kuberne
KubernetesClusterStartTimeout,
KubernetesClusterScaleTimeout,
KubernetesClusterUpgradeTimeout,
KubernetesClusterUpgradeRetries,
KubernetesClusterExperimentalFeaturesEnabled,
KubernetesMaxClusterSize
};

View File

@ -72,6 +72,12 @@ public interface KubernetesClusterService extends PluggableService, Configurable
"Timeout interval (in seconds) in which upgrade operation for a Kubernetes cluster should be completed. Not strictly obeyed while upgrade is in progress on a node",
true,
KubernetesServiceEnabled.key());
static final ConfigKey<Integer> KubernetesClusterUpgradeRetries = new ConfigKey<Integer>("Advanced", Integer.class,
"cloud.kubernetes.cluster.upgrade.retries",
"3",
"The number of retries if fail to upgrade kubernetes cluster due to some reasons (e.g. drain node, etcdserver leader changed)",
true,
KubernetesServiceEnabled.key());
static final ConfigKey<Boolean> KubernetesClusterExperimentalFeaturesEnabled = new ConfigKey<Boolean>("Advanced", Boolean.class,
"cloud.kubernetes.cluster.experimental.features.enabled",
"false",

View File

@ -77,39 +77,62 @@ public class KubernetesClusterUpgradeWorker extends KubernetesClusterActionWorke
}
private void upgradeKubernetesClusterNodes() {
Pair<Boolean, String> result = null;
for (int i = 0; i < clusterVMs.size(); ++i) {
UserVm vm = clusterVMs.get(i);
String hostName = vm.getHostName();
if (StringUtils.isNotEmpty(hostName)) {
hostName = hostName.toLowerCase();
}
result = null;
Pair<Boolean, String> result;
if (LOGGER.isInfoEnabled()) {
LOGGER.info(String.format("Upgrading node on VM %s in Kubernetes cluster %s with Kubernetes version(%s) ID: %s",
vm.getDisplayName(), kubernetesCluster.getName(), upgradeVersion.getSemanticVersion(), upgradeVersion.getUuid()));
}
try {
result = SshHelper.sshExecute(publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, null,
String.format("sudo /opt/bin/kubectl drain %s --ignore-daemonsets --delete-emptydir-data", hostName),
10000, 10000, 60000);
} catch (Exception e) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
}
if (!result.first()) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
String errorMessage = String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName());
for (int retry = KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; retry--) {
try {
result = SshHelper.sshExecute(publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, null,
String.format("sudo /opt/bin/kubectl drain %s --ignore-daemonsets --delete-emptydir-data", hostName),
10000, 10000, 60000);
if (result.first()) {
break;
}
if (retry > 0) {
LOGGER.error(String.format("%s, retries left: %s", errorMessage, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
} catch (Exception e) {
if (retry > 0) {
LOGGER.error(String.format("%s due to %s, retries left: %s", errorMessage, e, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
}
}
}
if (System.currentTimeMillis() > upgradeTimeoutTime) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, upgrade action timed out", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
try {
deployProvider();
result = runInstallScriptOnVM(vm, i);
} catch (Exception e) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
}
if (!result.first()) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
errorMessage = String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName());
for (int retry = KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; retry--) {
try {
deployProvider();
result = runInstallScriptOnVM(vm, i);
if (result.first()) {
break;
}
if (retry > 0) {
LOGGER.error(String.format("%s, retries left: %s", errorMessage, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
}
} catch (Exception e) {
if (retry > 0) {
LOGGER.error(String.format("%s due to %s, retries left: %s", errorMessage, e, retry));
} else {
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
}
}
}
if (System.currentTimeMillis() > upgradeTimeoutTime) {
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, upgrade action timed out", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);

View File

@ -283,13 +283,15 @@ class TestKubernetesCluster(cloudstackTestCase):
cls.apiclient.deleteKubernetesSupportedVersion(deleteKubernetesSupportedVersionCmd)
@classmethod
def listKubernetesCluster(cls, cluster_id = None):
def listKubernetesCluster(cls, cluster_id = None, cluster_name = None):
listKubernetesClustersCmd = listKubernetesClusters.listKubernetesClustersCmd()
listKubernetesClustersCmd.listall = True
if cluster_id != None:
listKubernetesClustersCmd.id = cluster_id
if cluster_name != None:
listKubernetesClustersCmd.name = cluster_name
clusterResponse = cls.apiclient.listKubernetesClusters(listKubernetesClustersCmd)
if cluster_id != None and clusterResponse != None:
if (cluster_id != None or cluster_name != None) and clusterResponse != None:
return clusterResponse[0]
return clusterResponse
@ -528,24 +530,6 @@ class TestKubernetesCluster(cloudstackTestCase):
return
@attr(tags=["advanced", "smoke"], required_hardware="true")
@skipTestIf("hypervisorNotSupported")
def test_07_deploy_kubernetes_ha_cluster(self):
"""Test to deploy a new HA Kubernetes cluster
# Validate the following:
# 1. createKubernetesCluster should return valid info for new cluster
# 2. The Cloud Database contains the valid information
"""
if self.setup_failed == True:
self.fail("Setup incomplete")
if self.default_network:
self.skipTest("HA cluster on shared network requires external ip address, skipping it")
global k8s_cluster
k8s_cluster = self.getValidKubernetesCluster(1, 3)
self.debug("HA Kubernetes cluster with ID: %s successfully deployed" % k8s_cluster.id)
return
@attr(tags=["advanced", "smoke"], required_hardware="true")
@skipTestIf("hypervisorNotSupported")
def test_08_upgrade_kubernetes_ha_cluster(self):
@ -573,24 +557,6 @@ class TestKubernetesCluster(cloudstackTestCase):
self.debug("Kubernetes cluster with ID: %s successfully upgraded" % k8s_cluster.id)
return
@attr(tags=["advanced", "smoke"], required_hardware="true")
@skipTestIf("hypervisorNotSupported")
def test_09_delete_kubernetes_ha_cluster(self):
"""Test to delete a HA Kubernetes cluster
# Validate the following:
# 1. deleteKubernetesCluster should delete an existing HA Kubernetes cluster
"""
if self.setup_failed == True:
self.fail("Setup incomplete")
if self.default_network:
self.skipTest("HA cluster on shared network requires external ip address, skipping it")
global k8s_cluster
k8s_cluster = self.getValidKubernetesCluster(1, 3)
self.debug("Deleting Kubernetes cluster with ID: %s" % k8s_cluster.id)
return
@attr(tags=["advanced", "smoke"], required_hardware="true")
@skipTestIf("hypervisorNotSupported")
def test_10_vpc_tier_kubernetes_cluster(self):
@ -818,8 +784,14 @@ class TestKubernetesCluster(cloudstackTestCase):
cluster = self.createKubernetesCluster(name, version.id, size, control_nodes)
self.verifyKubernetesCluster(cluster, name, version.id, size, control_nodes)
except Exception as ex:
cluster = self.listKubernetesCluster(cluster_name = name)
if cluster != None:
self.deleteKubernetesClusterAndVerify(cluster.id, False, True)
self.fail("Kubernetes cluster deployment failed: %s" % ex)
except AssertionError as err:
cluster = self.listKubernetesCluster(cluster_name = name)
if cluster != None:
self.deleteKubernetesClusterAndVerify(cluster.id, False, True)
self.fail("Kubernetes cluster deployment failed during cluster verification: %s" % err)
return cluster