mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Merge remote-tracking branch 'origin/4.18' into 4.19
This commit is contained in:
commit
0d36098c76
@ -1956,6 +1956,7 @@ public class KubernetesClusterManagerImpl extends ManagerBase implements Kuberne
|
||||
KubernetesClusterStartTimeout,
|
||||
KubernetesClusterScaleTimeout,
|
||||
KubernetesClusterUpgradeTimeout,
|
||||
KubernetesClusterUpgradeRetries,
|
||||
KubernetesClusterExperimentalFeaturesEnabled,
|
||||
KubernetesMaxClusterSize
|
||||
};
|
||||
|
||||
@ -72,6 +72,12 @@ public interface KubernetesClusterService extends PluggableService, Configurable
|
||||
"Timeout interval (in seconds) in which upgrade operation for a Kubernetes cluster should be completed. Not strictly obeyed while upgrade is in progress on a node",
|
||||
true,
|
||||
KubernetesServiceEnabled.key());
|
||||
static final ConfigKey<Integer> KubernetesClusterUpgradeRetries = new ConfigKey<Integer>("Advanced", Integer.class,
|
||||
"cloud.kubernetes.cluster.upgrade.retries",
|
||||
"3",
|
||||
"The number of retries if fail to upgrade kubernetes cluster due to some reasons (e.g. drain node, etcdserver leader changed)",
|
||||
true,
|
||||
KubernetesServiceEnabled.key());
|
||||
static final ConfigKey<Boolean> KubernetesClusterExperimentalFeaturesEnabled = new ConfigKey<Boolean>("Advanced", Boolean.class,
|
||||
"cloud.kubernetes.cluster.experimental.features.enabled",
|
||||
"false",
|
||||
|
||||
@ -77,39 +77,62 @@ public class KubernetesClusterUpgradeWorker extends KubernetesClusterActionWorke
|
||||
}
|
||||
|
||||
private void upgradeKubernetesClusterNodes() {
|
||||
Pair<Boolean, String> result = null;
|
||||
for (int i = 0; i < clusterVMs.size(); ++i) {
|
||||
UserVm vm = clusterVMs.get(i);
|
||||
String hostName = vm.getHostName();
|
||||
if (StringUtils.isNotEmpty(hostName)) {
|
||||
hostName = hostName.toLowerCase();
|
||||
}
|
||||
result = null;
|
||||
Pair<Boolean, String> result;
|
||||
if (LOGGER.isInfoEnabled()) {
|
||||
LOGGER.info(String.format("Upgrading node on VM %s in Kubernetes cluster %s with Kubernetes version(%s) ID: %s",
|
||||
vm.getDisplayName(), kubernetesCluster.getName(), upgradeVersion.getSemanticVersion(), upgradeVersion.getUuid()));
|
||||
}
|
||||
try {
|
||||
result = SshHelper.sshExecute(publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, null,
|
||||
String.format("sudo /opt/bin/kubectl drain %s --ignore-daemonsets --delete-emptydir-data", hostName),
|
||||
10000, 10000, 60000);
|
||||
} catch (Exception e) {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
|
||||
}
|
||||
if (!result.first()) {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
|
||||
String errorMessage = String.format("Failed to upgrade Kubernetes cluster : %s, unable to drain Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName());
|
||||
for (int retry = KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; retry--) {
|
||||
try {
|
||||
result = SshHelper.sshExecute(publicIpAddress, sshPort, getControlNodeLoginUser(), sshKeyFile, null,
|
||||
String.format("sudo /opt/bin/kubectl drain %s --ignore-daemonsets --delete-emptydir-data", hostName),
|
||||
10000, 10000, 60000);
|
||||
if (result.first()) {
|
||||
break;
|
||||
}
|
||||
if (retry > 0) {
|
||||
LOGGER.error(String.format("%s, retries left: %s", errorMessage, retry));
|
||||
} else {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (retry > 0) {
|
||||
LOGGER.error(String.format("%s due to %s, retries left: %s", errorMessage, e, retry));
|
||||
} else {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (System.currentTimeMillis() > upgradeTimeoutTime) {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, upgrade action timed out", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
|
||||
}
|
||||
try {
|
||||
deployProvider();
|
||||
result = runInstallScriptOnVM(vm, i);
|
||||
} catch (Exception e) {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
|
||||
}
|
||||
if (!result.first()) {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
|
||||
errorMessage = String.format("Failed to upgrade Kubernetes cluster : %s, unable to upgrade Kubernetes node on VM : %s", kubernetesCluster.getName(), vm.getDisplayName());
|
||||
for (int retry = KubernetesClusterService.KubernetesClusterUpgradeRetries.value(); retry >= 0; retry--) {
|
||||
try {
|
||||
deployProvider();
|
||||
result = runInstallScriptOnVM(vm, i);
|
||||
if (result.first()) {
|
||||
break;
|
||||
}
|
||||
if (retry > 0) {
|
||||
LOGGER.error(String.format("%s, retries left: %s", errorMessage, retry));
|
||||
} else {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
if (retry > 0) {
|
||||
LOGGER.error(String.format("%s due to %s, retries left: %s", errorMessage, e, retry));
|
||||
} else {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, errorMessage, kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, e);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (System.currentTimeMillis() > upgradeTimeoutTime) {
|
||||
logTransitStateDetachIsoAndThrow(Level.ERROR, String.format("Failed to upgrade Kubernetes cluster : %s, upgrade action timed out", kubernetesCluster.getName()), kubernetesCluster, clusterVMs, KubernetesCluster.Event.OperationFailed, null);
|
||||
|
||||
@ -283,13 +283,15 @@ class TestKubernetesCluster(cloudstackTestCase):
|
||||
cls.apiclient.deleteKubernetesSupportedVersion(deleteKubernetesSupportedVersionCmd)
|
||||
|
||||
@classmethod
|
||||
def listKubernetesCluster(cls, cluster_id = None):
|
||||
def listKubernetesCluster(cls, cluster_id = None, cluster_name = None):
|
||||
listKubernetesClustersCmd = listKubernetesClusters.listKubernetesClustersCmd()
|
||||
listKubernetesClustersCmd.listall = True
|
||||
if cluster_id != None:
|
||||
listKubernetesClustersCmd.id = cluster_id
|
||||
if cluster_name != None:
|
||||
listKubernetesClustersCmd.name = cluster_name
|
||||
clusterResponse = cls.apiclient.listKubernetesClusters(listKubernetesClustersCmd)
|
||||
if cluster_id != None and clusterResponse != None:
|
||||
if (cluster_id != None or cluster_name != None) and clusterResponse != None:
|
||||
return clusterResponse[0]
|
||||
return clusterResponse
|
||||
|
||||
@ -528,24 +530,6 @@ class TestKubernetesCluster(cloudstackTestCase):
|
||||
|
||||
return
|
||||
|
||||
@attr(tags=["advanced", "smoke"], required_hardware="true")
|
||||
@skipTestIf("hypervisorNotSupported")
|
||||
def test_07_deploy_kubernetes_ha_cluster(self):
|
||||
"""Test to deploy a new HA Kubernetes cluster
|
||||
|
||||
# Validate the following:
|
||||
# 1. createKubernetesCluster should return valid info for new cluster
|
||||
# 2. The Cloud Database contains the valid information
|
||||
"""
|
||||
if self.setup_failed == True:
|
||||
self.fail("Setup incomplete")
|
||||
if self.default_network:
|
||||
self.skipTest("HA cluster on shared network requires external ip address, skipping it")
|
||||
global k8s_cluster
|
||||
k8s_cluster = self.getValidKubernetesCluster(1, 3)
|
||||
self.debug("HA Kubernetes cluster with ID: %s successfully deployed" % k8s_cluster.id)
|
||||
return
|
||||
|
||||
@attr(tags=["advanced", "smoke"], required_hardware="true")
|
||||
@skipTestIf("hypervisorNotSupported")
|
||||
def test_08_upgrade_kubernetes_ha_cluster(self):
|
||||
@ -573,24 +557,6 @@ class TestKubernetesCluster(cloudstackTestCase):
|
||||
self.debug("Kubernetes cluster with ID: %s successfully upgraded" % k8s_cluster.id)
|
||||
return
|
||||
|
||||
@attr(tags=["advanced", "smoke"], required_hardware="true")
|
||||
@skipTestIf("hypervisorNotSupported")
|
||||
def test_09_delete_kubernetes_ha_cluster(self):
|
||||
"""Test to delete a HA Kubernetes cluster
|
||||
|
||||
# Validate the following:
|
||||
# 1. deleteKubernetesCluster should delete an existing HA Kubernetes cluster
|
||||
"""
|
||||
if self.setup_failed == True:
|
||||
self.fail("Setup incomplete")
|
||||
if self.default_network:
|
||||
self.skipTest("HA cluster on shared network requires external ip address, skipping it")
|
||||
global k8s_cluster
|
||||
k8s_cluster = self.getValidKubernetesCluster(1, 3)
|
||||
|
||||
self.debug("Deleting Kubernetes cluster with ID: %s" % k8s_cluster.id)
|
||||
return
|
||||
|
||||
@attr(tags=["advanced", "smoke"], required_hardware="true")
|
||||
@skipTestIf("hypervisorNotSupported")
|
||||
def test_10_vpc_tier_kubernetes_cluster(self):
|
||||
@ -818,8 +784,14 @@ class TestKubernetesCluster(cloudstackTestCase):
|
||||
cluster = self.createKubernetesCluster(name, version.id, size, control_nodes)
|
||||
self.verifyKubernetesCluster(cluster, name, version.id, size, control_nodes)
|
||||
except Exception as ex:
|
||||
cluster = self.listKubernetesCluster(cluster_name = name)
|
||||
if cluster != None:
|
||||
self.deleteKubernetesClusterAndVerify(cluster.id, False, True)
|
||||
self.fail("Kubernetes cluster deployment failed: %s" % ex)
|
||||
except AssertionError as err:
|
||||
cluster = self.listKubernetesCluster(cluster_name = name)
|
||||
if cluster != None:
|
||||
self.deleteKubernetesClusterAndVerify(cluster.id, False, True)
|
||||
self.fail("Kubernetes cluster deployment failed during cluster verification: %s" % err)
|
||||
return cluster
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user