mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
forceha: fix two issues when (1)stop vm from inside (2) force remove host (#4647)
* forceha: fix vm is not started if it is poweroff from inside
steps to reproduce the issue
(1) make sure force.ha is true in global setting. if not, change it to true, and restart mgt server
(2) create a service offering , ha is not enabled
(3) create a vm
(4) log into the vm, and power off via cli.
expected result: vm is started again by cloudstack
actual result: vm is not started.
* forceha: fix vms are still running if host is force-removed
when host can be force removed, however vms are stopped in cloudstack, but not stopped on host
```
(localcloud) 🐱 > delete host id="a5625393-444d-4d0a-b31d-62baf88a8be1" forced=true
{
"success": true
}```
after some minutes, vms are still runnning on host
```
root@mgt01:~# ssh node63 virsh list
Id Name State
---------------------------
1 i-2-19-VM running
2 i-2-11-VM running
```
error message are
```
Cannot transmit host 2 to Enabled state
com.cloud.utils.fsm.NoTransitionException: No next resource state found for current state = Enabled event = DeleteHost
at com.cloud.resource.ResourceManagerImpl.resourceStateTransitTo(ResourceManagerImpl.java:1216)
at com.cloud.resource.ResourceManagerImpl$1.doInTransactionWithoutResult(ResourceManagerImpl.java:907)
```
* forceha: Make ForceHA dynamic
This commit is contained in:
parent
6b1c94ea3e
commit
e2183ed666
@ -114,6 +114,7 @@ public enum ResourceState {
|
||||
s_fsm.addTransition(ResourceState.Enabled, Event.Disable, ResourceState.Disabled);
|
||||
s_fsm.addTransition(ResourceState.Enabled, Event.AdminAskMaintenance, ResourceState.PrepareForMaintenance);
|
||||
s_fsm.addTransition(ResourceState.Enabled, Event.InternalEnterMaintenance, ResourceState.Maintenance);
|
||||
s_fsm.addTransition(ResourceState.Enabled, Event.DeleteHost, ResourceState.Disabled);
|
||||
s_fsm.addTransition(ResourceState.Disabled, Event.Enable, ResourceState.Enabled);
|
||||
s_fsm.addTransition(ResourceState.Disabled, Event.Disable, ResourceState.Disabled);
|
||||
s_fsm.addTransition(ResourceState.Disabled, Event.InternalCreated, ResourceState.Disabled);
|
||||
|
||||
@ -32,7 +32,7 @@ import java.util.List;
|
||||
*/
|
||||
public interface HighAvailabilityManager extends Manager {
|
||||
|
||||
ConfigKey<Boolean> ForceHA = new ConfigKey<>("Advanced", Boolean.class, "force.ha", "false",
|
||||
public ConfigKey<Boolean> ForceHA = new ConfigKey<>("Advanced", Boolean.class, "force.ha", "false",
|
||||
"Force High-Availability to happen even if the VM says no.", true, Cluster);
|
||||
|
||||
ConfigKey<Integer> HAWorkers = new ConfigKey<>("Advanced", Integer.class, "ha.workers", "5",
|
||||
|
||||
@ -4498,7 +4498,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac
|
||||
String.format("VM %s is at %s and we received a %s report while there is no pending jobs on it"
|
||||
, vm.getInstanceName(), vm.getState(), vm.getPowerState()));
|
||||
}
|
||||
if(vm.isHaEnabled() && vm.getState() == State.Running
|
||||
if((HighAvailabilityManager.ForceHA.value() || vm.isHaEnabled()) && vm.getState() == State.Running
|
||||
&& HaVmRestartHostUp.value()
|
||||
&& vm.getHypervisorType() != HypervisorType.VMware
|
||||
&& vm.getHypervisorType() != HypervisorType.Hyperv) {
|
||||
|
||||
@ -197,7 +197,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur
|
||||
int _maxRetries;
|
||||
long _timeBetweenFailures;
|
||||
long _timeBetweenCleanups;
|
||||
boolean _forceHA;
|
||||
String _haTag = null;
|
||||
|
||||
protected HighAvailabilityManagerImpl() {
|
||||
@ -364,7 +363,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur
|
||||
alertType = AlertManager.AlertType.ALERT_TYPE_SSVM;
|
||||
}
|
||||
|
||||
if (!(_forceHA || vm.isHaEnabled())) {
|
||||
if (!(ForceHA.value() || vm.isHaEnabled())) {
|
||||
String hostDesc = "id:" + vm.getHostId() + ", availability zone id:" + vm.getDataCenterId() + ", pod id:" + vm.getPodIdToDeployIn();
|
||||
_alertMgr.sendAlert(alertType, vm.getDataCenterId(), vm.getPodIdToDeployIn(), "VM (name: " + vm.getHostName() + ", id: " + vm.getId() +
|
||||
") stopped unexpectedly on host " + hostDesc, "Virtual Machine " + vm.getHostName() + " (id: " + vm.getId() + ") running on host [" + vm.getHostId() +
|
||||
@ -569,7 +568,7 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur
|
||||
|
||||
vm = _itMgr.findById(vm.getId());
|
||||
|
||||
if (!_forceHA && !vm.isHaEnabled()) {
|
||||
if (!ForceHA.value() && !vm.isHaEnabled()) {
|
||||
if (s_logger.isDebugEnabled()) {
|
||||
s_logger.debug("VM is not HA enabled so we're done.");
|
||||
}
|
||||
@ -861,7 +860,6 @@ public class HighAvailabilityManagerImpl extends ManagerBase implements Configur
|
||||
_workers[i] = new WorkerThread("HA-Worker-" + i);
|
||||
}
|
||||
|
||||
_forceHA = ForceHA.value();
|
||||
_timeToSleep = TimeToSleep.value() * SECONDS_TO_MILLISECONDS_FACTOR;
|
||||
_maxRetries = MigrationMaxRetries.value();
|
||||
_timeBetweenFailures = TimeBetweenFailures.value() * SECONDS_TO_MILLISECONDS_FACTOR;
|
||||
|
||||
@ -2325,34 +2325,32 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
|
||||
s_logger.debug("Deleting Host: " + host.getId() + " Guid:" + host.getGuid());
|
||||
}
|
||||
|
||||
if (forceDestroyStorage) {
|
||||
final StoragePoolVO storagePool = _storageMgr.findLocalStorageOnHost(host.getId());
|
||||
if (forceDestroyStorage && storagePool != null) {
|
||||
// put local storage into mainenance mode, will set all the VMs on
|
||||
// this local storage into stopped state
|
||||
final StoragePoolVO storagePool = _storageMgr.findLocalStorageOnHost(host.getId());
|
||||
if (storagePool != null) {
|
||||
if (storagePool.getStatus() == StoragePoolStatus.Up || storagePool.getStatus() == StoragePoolStatus.ErrorInMaintenance) {
|
||||
try {
|
||||
final StoragePool pool = _storageSvr.preparePrimaryStorageForMaintenance(storagePool.getId());
|
||||
if (pool == null) {
|
||||
s_logger.debug("Failed to set primary storage into maintenance mode");
|
||||
if (storagePool.getStatus() == StoragePoolStatus.Up || storagePool.getStatus() == StoragePoolStatus.ErrorInMaintenance) {
|
||||
try {
|
||||
final StoragePool pool = _storageSvr.preparePrimaryStorageForMaintenance(storagePool.getId());
|
||||
if (pool == null) {
|
||||
s_logger.debug("Failed to set primary storage into maintenance mode");
|
||||
|
||||
throw new UnableDeleteHostException("Failed to set primary storage into maintenance mode");
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
s_logger.debug("Failed to set primary storage into maintenance mode, due to: " + e.toString());
|
||||
throw new UnableDeleteHostException("Failed to set primary storage into maintenance mode, due to: " + e.toString());
|
||||
throw new UnableDeleteHostException("Failed to set primary storage into maintenance mode");
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
s_logger.debug("Failed to set primary storage into maintenance mode, due to: " + e.toString());
|
||||
throw new UnableDeleteHostException("Failed to set primary storage into maintenance mode, due to: " + e.toString());
|
||||
}
|
||||
}
|
||||
|
||||
final List<VMInstanceVO> vmsOnLocalStorage = _storageMgr.listByStoragePool(storagePool.getId());
|
||||
for (final VMInstanceVO vm : vmsOnLocalStorage) {
|
||||
try {
|
||||
_vmMgr.destroy(vm.getUuid(), false);
|
||||
} catch (final Exception e) {
|
||||
final String errorMsg = "There was an error Destory the vm: " + vm + " as a part of hostDelete id=" + host.getId();
|
||||
s_logger.debug(errorMsg, e);
|
||||
throw new UnableDeleteHostException(errorMsg + "," + e.getMessage());
|
||||
}
|
||||
final List<VMInstanceVO> vmsOnLocalStorage = _storageMgr.listByStoragePool(storagePool.getId());
|
||||
for (final VMInstanceVO vm : vmsOnLocalStorage) {
|
||||
try {
|
||||
_vmMgr.destroy(vm.getUuid(), false);
|
||||
} catch (final Exception e) {
|
||||
final String errorMsg = "There was an error Destory the vm: " + vm + " as a part of hostDelete id=" + host.getId();
|
||||
s_logger.debug(errorMsg, e);
|
||||
throw new UnableDeleteHostException(errorMsg + "," + e.getMessage());
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -2362,17 +2360,22 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager,
|
||||
if (isForced) {
|
||||
// Stop HA disabled vms and HA enabled vms in Stopping state
|
||||
// Restart HA enabled vms
|
||||
try {
|
||||
resourceStateTransitTo(host, ResourceState.Event.DeleteHost, host.getId());
|
||||
} catch (final NoTransitionException e) {
|
||||
s_logger.debug("Cannot transmit host " + host.getId() + " to Disabled state", e);
|
||||
}
|
||||
for (final VMInstanceVO vm : vms) {
|
||||
if (!vm.isHaEnabled() || vm.getState() == State.Stopping) {
|
||||
if ((! HighAvailabilityManager.ForceHA.value() && !vm.isHaEnabled()) || vm.getState() == State.Stopping) {
|
||||
s_logger.debug("Stopping vm: " + vm + " as a part of deleteHost id=" + host.getId());
|
||||
try {
|
||||
_vmMgr.advanceStop(vm.getUuid(), false);
|
||||
_haMgr.scheduleStop(vm, host.getId(), WorkType.Stop);
|
||||
} catch (final Exception e) {
|
||||
final String errorMsg = "There was an error stopping the vm: " + vm + " as a part of hostDelete id=" + host.getId();
|
||||
s_logger.debug(errorMsg, e);
|
||||
throw new UnableDeleteHostException(errorMsg + "," + e.getMessage());
|
||||
}
|
||||
} else if (vm.isHaEnabled() && (vm.getState() == State.Running || vm.getState() == State.Starting)) {
|
||||
} else if ((HighAvailabilityManager.ForceHA.value() || vm.isHaEnabled()) && (vm.getState() == State.Running || vm.getState() == State.Starting)) {
|
||||
s_logger.debug("Scheduling restart for vm: " + vm + " " + vm.getState() + " on the host id=" + host.getId());
|
||||
_haMgr.scheduleRestart(vm, false);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user