// Licensed to the Apache Software Foundation (ASF) under one // or more contributor license agreements. See the NOTICE file // distributed with this work for additional information // regarding copyright ownership. The ASF licenses this file // to you under the Apache License, Version 2.0 (the // "License"); you may not use this file except in compliance // with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, // software distributed under the License is distributed on an // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY // KIND, either express or implied. See the License for the // specific language governing permissions and limitations // under the License. package com.cloud.resource; import static com.cloud.configuration.ConfigurationManagerImpl.MIGRATE_VM_ACROSS_CLUSTERS; import static com.cloud.configuration.ConfigurationManagerImpl.SET_HOST_DOWN_TO_MAINTENANCE; import java.net.URI; import java.net.URISyntaxException; import java.net.URLDecoder; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Random; import java.util.stream.Collectors; import javax.inject.Inject; import javax.naming.ConfigurationException; import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.annotation.AnnotationService; import org.apache.cloudstack.annotation.dao.AnnotationDao; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.command.admin.cluster.AddClusterCmd; import org.apache.cloudstack.api.command.admin.cluster.DeleteClusterCmd; import org.apache.cloudstack.api.command.admin.cluster.UpdateClusterCmd; import org.apache.cloudstack.api.command.admin.host.AddHostCmd; import org.apache.cloudstack.api.command.admin.host.AddSecondaryStorageCmd; import org.apache.cloudstack.api.command.admin.host.CancelHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.CancelMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.DeclareHostAsDegradedCmd; import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; import org.apache.cloudstack.api.command.admin.host.ReconnectHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostCmd; import org.apache.cloudstack.api.command.admin.host.UpdateHostPasswordCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.framework.config.ConfigKey; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang.ObjectUtils; import org.springframework.stereotype.Component; import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; import com.cloud.agent.api.Command; import com.cloud.agent.api.GetGPUStatsAnswer; import com.cloud.agent.api.GetGPUStatsCommand; import com.cloud.agent.api.GetHostStatsAnswer; import com.cloud.agent.api.GetHostStatsCommand; import com.cloud.agent.api.GetVncPortAnswer; import com.cloud.agent.api.GetVncPortCommand; import com.cloud.agent.api.MaintainAnswer; import com.cloud.agent.api.MaintainCommand; import com.cloud.agent.api.PropagateResourceEventCommand; import com.cloud.agent.api.StartupCommand; import com.cloud.agent.api.StartupRoutingCommand; import com.cloud.agent.api.UnsupportedAnswer; import com.cloud.agent.api.UpdateHostPasswordCommand; import com.cloud.agent.api.VgpuTypesInfo; import com.cloud.agent.api.to.GPUDeviceTO; import com.cloud.agent.transport.Request; import com.cloud.alert.AlertManager; import com.cloud.capacity.Capacity; import com.cloud.capacity.CapacityManager; import com.cloud.capacity.CapacityState; import com.cloud.capacity.CapacityVO; import com.cloud.capacity.dao.CapacityDao; import com.cloud.cluster.ClusterManager; import com.cloud.configuration.Config; import com.cloud.cpu.CPU; import com.cloud.dc.ClusterDetailsDao; import com.cloud.dc.ClusterDetailsVO; import com.cloud.dc.ClusterVO; import com.cloud.dc.DataCenter; import com.cloud.dc.DataCenter.NetworkType; import com.cloud.dc.DataCenterIpAddressVO; import com.cloud.dc.DataCenterVO; import com.cloud.dc.DedicatedResourceVO; import com.cloud.dc.HostPodVO; import com.cloud.dc.PodCluster; import com.cloud.dc.dao.ClusterDao; import com.cloud.dc.dao.ClusterVSMMapDao; import com.cloud.dc.dao.DataCenterDao; import com.cloud.dc.dao.DataCenterIpAddressDao; import com.cloud.dc.dao.DedicatedResourceDao; import com.cloud.dc.dao.HostPodDao; import com.cloud.deploy.DataCenterDeployment; import com.cloud.deploy.DeployDestination; import com.cloud.deploy.DeploymentPlanner; import com.cloud.deploy.DeploymentPlanningManager; import com.cloud.deploy.PlannerHostReservationVO; import com.cloud.deploy.dao.PlannerHostReservationDao; import com.cloud.event.ActionEvent; import com.cloud.event.ActionEventUtils; import com.cloud.event.EventTypes; import com.cloud.event.EventVO; import com.cloud.exception.AgentUnavailableException; import com.cloud.exception.DiscoveryException; import com.cloud.exception.InsufficientServerCapacityException; import com.cloud.exception.InvalidParameterValueException; import com.cloud.exception.PermissionDeniedException; import com.cloud.exception.ResourceUnavailableException; import com.cloud.exception.StorageConflictException; import com.cloud.exception.StorageUnavailableException; import com.cloud.gpu.GPU; import com.cloud.gpu.HostGpuGroupsVO; import com.cloud.gpu.VGPUTypesVO; import com.cloud.gpu.dao.HostGpuGroupsDao; import com.cloud.gpu.dao.VGPUTypesDao; import com.cloud.ha.HighAvailabilityManager; import com.cloud.ha.HighAvailabilityManager.WorkType; import com.cloud.ha.HighAvailabilityManagerImpl; import com.cloud.host.DetailVO; import com.cloud.host.Host; import com.cloud.host.Host.Type; import com.cloud.host.HostStats; import com.cloud.host.HostTagVO; import com.cloud.host.HostVO; import com.cloud.host.Status; import com.cloud.host.Status.Event; import com.cloud.host.dao.HostDao; import com.cloud.host.dao.HostDetailsDao; import com.cloud.host.dao.HostTagsDao; import com.cloud.hypervisor.Hypervisor; import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.hypervisor.HypervisorGuru; import com.cloud.hypervisor.kvm.discoverer.KvmDummyResourceBase; import com.cloud.network.dao.IPAddressDao; import com.cloud.network.dao.IPAddressVO; import com.cloud.org.Cluster; import com.cloud.org.Grouping; import com.cloud.org.Managed; import com.cloud.serializer.GsonHelper; import com.cloud.service.ServiceOfferingVO; import com.cloud.service.dao.ServiceOfferingDao; import com.cloud.service.dao.ServiceOfferingDetailsDao; import com.cloud.storage.GuestOSCategoryVO; import com.cloud.storage.StorageManager; import com.cloud.storage.StoragePool; import com.cloud.storage.StoragePoolHostVO; import com.cloud.storage.StoragePoolStatus; import com.cloud.storage.StorageService; import com.cloud.storage.VMTemplateVO; import com.cloud.storage.Volume; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.GuestOSCategoryDao; import com.cloud.storage.dao.StoragePoolHostDao; import com.cloud.storage.dao.VMTemplateDao; import com.cloud.storage.dao.VolumeDao; import com.cloud.user.Account; import com.cloud.user.AccountManager; import com.cloud.utils.StringUtils; import com.cloud.utils.Ternary; import com.cloud.utils.UriUtils; import com.cloud.utils.component.Manager; import com.cloud.utils.component.ManagerBase; import com.cloud.utils.db.DB; import com.cloud.utils.db.Filter; import com.cloud.utils.db.GenericSearchBuilder; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.db.JoinBuilder; import com.cloud.utils.db.QueryBuilder; import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.SearchCriteria.Func; import com.cloud.utils.db.SearchCriteria.Op; import com.cloud.utils.db.Transaction; import com.cloud.utils.db.TransactionCallback; import com.cloud.utils.db.TransactionCallbackNoReturn; import com.cloud.utils.db.TransactionLegacy; import com.cloud.utils.db.TransactionStatus; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.utils.fsm.NoTransitionException; import com.cloud.utils.net.Ip; import com.cloud.utils.net.NetUtils; import com.cloud.utils.ssh.SSHCmdHelper; import com.cloud.utils.ssh.SshException; import com.cloud.vm.UserVmManager; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineManager; import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.VirtualMachineProfileImpl; import com.cloud.vm.VmDetailConstants; import com.cloud.vm.dao.UserVmDetailsDao; import com.cloud.vm.dao.VMInstanceDao; import com.google.gson.Gson; @Component public class ResourceManagerImpl extends ManagerBase implements ResourceManager, ResourceService, Manager { Gson _gson; @Inject private AccountManager _accountMgr; @Inject private AgentManager _agentMgr; @Inject private StorageManager _storageMgr; @Inject private DataCenterDao _dcDao; @Inject private HostPodDao _podDao; @Inject private ClusterDetailsDao _clusterDetailsDao; @Inject private ClusterDao _clusterDao; @Inject private CapacityDao _capacityDao; @Inject private ServiceOfferingDao serviceOfferingDao; @Inject private HostDao _hostDao; @Inject private HostDetailsDao _hostDetailsDao; @Inject private ConfigurationDao _configDao; @Inject private HostTagsDao _hostTagsDao; @Inject private GuestOSCategoryDao _guestOSCategoryDao; @Inject protected HostGpuGroupsDao _hostGpuGroupsDao; @Inject protected VGPUTypesDao _vgpuTypesDao; @Inject private PrimaryDataStoreDao _storagePoolDao; @Inject private DataCenterIpAddressDao _privateIPAddressDao; @Inject private IPAddressDao _publicIPAddressDao; @Inject private DeploymentPlanningManager deploymentManager; @Inject private VirtualMachineManager _vmMgr; @Inject private VMInstanceDao _vmDao; @Inject private HighAvailabilityManager _haMgr; @Inject private StorageService _storageSvr; @Inject PlannerHostReservationDao _plannerHostReserveDao; @Inject private DedicatedResourceDao _dedicatedDao; @Inject private ServiceOfferingDetailsDao _serviceOfferingDetailsDao; @Inject private UserVmManager userVmManager; private List _discoverers; public List getDiscoverers() { return _discoverers; } public void setDiscoverers(final List discoverers) { _discoverers = discoverers; } @Inject private ClusterManager _clusterMgr; @Inject private StoragePoolHostDao _storagePoolHostDao; @Inject private VMTemplateDao _templateDao; @Inject private ClusterVSMMapDao _clusterVSMMapDao; @Inject private UserVmDetailsDao userVmDetailsDao; @Inject private AnnotationDao annotationDao; @Inject private AlertManager alertManager; @Inject private AnnotationService annotationService; @Inject private VolumeDao volumeDao; private final long _nodeId = ManagementServerNode.getManagementServerId(); private final HashMap _resourceStateAdapters = new HashMap<>(); private final HashMap> _lifeCycleListeners = new HashMap<>(); private HypervisorType _defaultSystemVMHypervisor; private static final int ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION = 30; // seconds private GenericSearchBuilder _hypervisorsInDC; private SearchBuilder _gpuAvailability; private void insertListener(final Integer event, final ResourceListener listener) { List lst = _lifeCycleListeners.computeIfAbsent(event, k -> new ArrayList<>()); if (lst.contains(listener)) { throw new CloudRuntimeException("Duplicate resource lisener:" + listener.getClass().getSimpleName()); } lst.add(listener); } @Override public void registerResourceEvent(final Integer event, final ResourceListener listener) { synchronized (_lifeCycleListeners) { if ((event & ResourceListener.EVENT_DISCOVER_BEFORE) != 0) { insertListener(ResourceListener.EVENT_DISCOVER_BEFORE, listener); } if ((event & ResourceListener.EVENT_DISCOVER_AFTER) != 0) { insertListener(ResourceListener.EVENT_DISCOVER_AFTER, listener); } if ((event & ResourceListener.EVENT_DELETE_HOST_BEFORE) != 0) { insertListener(ResourceListener.EVENT_DELETE_HOST_BEFORE, listener); } if ((event & ResourceListener.EVENT_DELETE_HOST_AFTER) != 0) { insertListener(ResourceListener.EVENT_DELETE_HOST_AFTER, listener); } if ((event & ResourceListener.EVENT_CANCEL_MAINTENANCE_BEFORE) != 0) { insertListener(ResourceListener.EVENT_CANCEL_MAINTENANCE_BEFORE, listener); } if ((event & ResourceListener.EVENT_CANCEL_MAINTENANCE_AFTER) != 0) { insertListener(ResourceListener.EVENT_CANCEL_MAINTENANCE_AFTER, listener); } if ((event & ResourceListener.EVENT_PREPARE_MAINTENANCE_BEFORE) != 0) { insertListener(ResourceListener.EVENT_PREPARE_MAINTENANCE_BEFORE, listener); } if ((event & ResourceListener.EVENT_PREPARE_MAINTENANCE_AFTER) != 0) { insertListener(ResourceListener.EVENT_PREPARE_MAINTENANCE_AFTER, listener); } } } @Override public void unregisterResourceEvent(final ResourceListener listener) { synchronized (_lifeCycleListeners) { for (Map.Entry> items : _lifeCycleListeners.entrySet()) { final List lst = items.getValue(); lst.remove(listener); } } } protected void processResourceEvent(final Integer event, final Object... params) { final List lst = _lifeCycleListeners.get(event); if (lst == null || lst.isEmpty()) { return; } String eventName; for (final ResourceListener l : lst) { if (event.equals(ResourceListener.EVENT_DISCOVER_BEFORE)) { l.processDiscoverEventBefore((Long)params[0], (Long)params[1], (Long)params[2], (URI)params[3], (String)params[4], (String)params[5], (List)params[6]); eventName = "EVENT_DISCOVER_BEFORE"; } else if (event.equals(ResourceListener.EVENT_DISCOVER_AFTER)) { l.processDiscoverEventAfter((Map>)params[0]); eventName = "EVENT_DISCOVER_AFTER"; } else if (event.equals(ResourceListener.EVENT_DELETE_HOST_BEFORE)) { l.processDeleteHostEventBefore((HostVO)params[0]); eventName = "EVENT_DELETE_HOST_BEFORE"; } else if (event.equals(ResourceListener.EVENT_DELETE_HOST_AFTER)) { l.processDeletHostEventAfter((HostVO)params[0]); eventName = "EVENT_DELETE_HOST_AFTER"; } else if (event.equals(ResourceListener.EVENT_CANCEL_MAINTENANCE_BEFORE)) { l.processCancelMaintenaceEventBefore((Long)params[0]); eventName = "EVENT_CANCEL_MAINTENANCE_BEFORE"; } else if (event.equals(ResourceListener.EVENT_CANCEL_MAINTENANCE_AFTER)) { l.processCancelMaintenaceEventAfter((Long)params[0]); eventName = "EVENT_CANCEL_MAINTENANCE_AFTER"; } else if (event.equals(ResourceListener.EVENT_PREPARE_MAINTENANCE_BEFORE)) { l.processPrepareMaintenaceEventBefore((Long)params[0]); eventName = "EVENT_PREPARE_MAINTENANCE_BEFORE"; } else if (event.equals(ResourceListener.EVENT_PREPARE_MAINTENANCE_AFTER)) { l.processPrepareMaintenaceEventAfter((Long)params[0]); eventName = "EVENT_PREPARE_MAINTENANCE_AFTER"; } else { throw new CloudRuntimeException("Unknown resource event:" + event); } logger.debug("Sent resource event " + eventName + " to listener " + l.getClass().getSimpleName()); } } @DB @Override public List discoverCluster(final AddClusterCmd cmd) throws IllegalArgumentException, DiscoveryException { final long dcId = cmd.getZoneId(); final long podId = cmd.getPodId(); final String clusterName = cmd.getClusterName(); String url = cmd.getUrl(); final String username = cmd.getUsername(); final String password = cmd.getPassword(); CPU.CPUArch arch = cmd.getArch(); if (url != null) { url = URLDecoder.decode(url, com.cloud.utils.StringUtils.getPreferredCharset()); } URI uri; // Check if the zone exists in the system final DataCenterVO zone = _dcDao.findById(dcId); if (zone == null) { final InvalidParameterValueException ex = new InvalidParameterValueException("Can't find zone by the id specified"); ex.addProxyObject(String.valueOf(dcId), "dcId"); throw ex; } final Account account = CallContext.current().getCallingAccount(); if (Grouping.AllocationState.Disabled == zone.getAllocationState() && !_accountMgr.isRootAdmin(account.getId())) { final PermissionDeniedException ex = new PermissionDeniedException("Cannot perform this operation, Zone with specified id is currently disabled"); ex.addProxyObject(zone.getUuid(), "dcId"); throw ex; } final HostPodVO pod = _podDao.findById(podId); // Check if the pod exists in the system if (_podDao.findById(podId) == null) { throw new InvalidParameterValueException("Can't find pod by id " + podId); } // check if pod belongs to the zone if (!Long.valueOf(pod.getDataCenterId()).equals(dcId)) { final InvalidParameterValueException ex = new InvalidParameterValueException(String.format("Pod with specified id doesn't belong to the zone %s", zone)); ex.addProxyObject(pod.getUuid(), "podId"); ex.addProxyObject(zone.getUuid(), "dcId"); throw ex; } // Verify cluster information and create a new cluster if needed if (clusterName == null || clusterName.isEmpty()) { throw new InvalidParameterValueException("Please specify cluster name"); } if (cmd.getHypervisor() == null || cmd.getHypervisor().isEmpty()) { throw new InvalidParameterValueException("Please specify a hypervisor"); } final Hypervisor.HypervisorType hypervisorType = Hypervisor.HypervisorType.getType(cmd.getHypervisor()); if (hypervisorType == null) { logger.error("Unable to resolve " + cmd.getHypervisor() + " to a valid supported hypervisor type"); throw new InvalidParameterValueException("Unable to resolve " + cmd.getHypervisor() + " to a supported "); } if (zone.isSecurityGroupEnabled() && zone.getNetworkType().equals(NetworkType.Advanced)) { if (hypervisorType != HypervisorType.KVM && hypervisorType != HypervisorType.XenServer && hypervisorType != HypervisorType.LXC && hypervisorType != HypervisorType.Simulator) { throw new InvalidParameterValueException("Don't support hypervisor type " + hypervisorType + " in advanced security enabled zone"); } } Cluster.ClusterType clusterType = null; if (cmd.getClusterType() != null && !cmd.getClusterType().isEmpty()) { clusterType = Cluster.ClusterType.valueOf(cmd.getClusterType()); } if (clusterType == null) { clusterType = Cluster.ClusterType.CloudManaged; } Grouping.AllocationState allocationState = null; if (cmd.getAllocationState() != null && !cmd.getAllocationState().isEmpty()) { try { allocationState = Grouping.AllocationState.valueOf(cmd.getAllocationState()); } catch (final IllegalArgumentException ex) { throw new InvalidParameterValueException("Unable to resolve Allocation State '" + cmd.getAllocationState() + "' to a supported state"); } } if (allocationState == null) { allocationState = Grouping.AllocationState.Enabled; } final Discoverer discoverer = getMatchingDiscover(hypervisorType); if (discoverer == null) { throw new InvalidParameterValueException("Could not find corresponding resource manager for " + cmd.getHypervisor()); } if (hypervisorType == HypervisorType.VMware) { final Map allParams = cmd.getFullUrlParams(); discoverer.putParam(allParams); } final List result = new ArrayList<>(); ClusterVO cluster = new ClusterVO(dcId, podId, clusterName); cluster.setHypervisorType(hypervisorType.toString()); cluster.setClusterType(clusterType); cluster.setAllocationState(allocationState); cluster.setArch(arch.getType()); try { cluster = _clusterDao.persist(cluster); } catch (final Exception e) { // no longer tolerate exception during the cluster creation phase final CloudRuntimeException ex = new CloudRuntimeException("Unable to create cluster " + clusterName + " in pod and data center with specified ids", e); // Get the pod VO object's table name. ex.addProxyObject(pod.getUuid(), "podId"); ex.addProxyObject(zone.getUuid(), "dcId"); throw ex; } result.add(cluster); if (clusterType == Cluster.ClusterType.CloudManaged) { final Map details = new HashMap<>(); // should do this nicer perhaps ? if (hypervisorType == HypervisorType.Ovm3) { final Map allParams = cmd.getFullUrlParams(); details.put("ovm3vip", allParams.get("ovm3vip")); details.put("ovm3pool", allParams.get("ovm3pool")); details.put("ovm3cluster", allParams.get("ovm3cluster")); } details.put(VmDetailConstants.CPU_OVER_COMMIT_RATIO, CapacityManager.CpuOverprovisioningFactor.value().toString()); details.put(VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, CapacityManager.MemOverprovisioningFactor.value().toString()); _clusterDetailsDao.persist(cluster.getId(), details); return result; } // save cluster details for later cluster/host cross-checking final Map details = new HashMap<>(); details.put("url", url); details.put("username", StringUtils.defaultString(username)); details.put("password", StringUtils.defaultString(password)); details.put(VmDetailConstants.CPU_OVER_COMMIT_RATIO, CapacityManager.CpuOverprovisioningFactor.value().toString()); details.put(VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, CapacityManager.MemOverprovisioningFactor.value().toString()); _clusterDetailsDao.persist(cluster.getId(), details); boolean success = false; try { try { uri = new URI(UriUtils.encodeURIComponent(url)); if (uri.getScheme() == null) { throw new InvalidParameterValueException("uri.scheme is null " + url + ", add http:// as a prefix"); } else if (uri.getScheme().equalsIgnoreCase("http")) { if (uri.getHost() == null || uri.getHost().equalsIgnoreCase("") || uri.getPath() == null || uri.getPath().equalsIgnoreCase("")) { throw new InvalidParameterValueException("Your host and/or path is wrong. Make sure it's of the format http://hostname/path"); } } } catch (final URISyntaxException e) { throw new InvalidParameterValueException(url + " is not a valid uri"); } final List hosts = new ArrayList<>(); Map> resources; resources = discoverer.find(dcId, podId, cluster.getId(), uri, username, password, null); if (resources != null) { for (final Map.Entry> entry : resources.entrySet()) { final ServerResource resource = entry.getKey(); final HostVO host = (HostVO)createHostAndAgent(resource, entry.getValue(), true, null, false); if (host != null) { hosts.add(host); } discoverer.postDiscovery(hosts, _nodeId); } logger.info("External cluster has been successfully discovered by " + discoverer.getName()); success = true; CallContext.current().putContextParameter(Cluster.class, cluster.getUuid()); return result; } logger.warn("Unable to find the server resources at " + url); throw new DiscoveryException("Unable to add the external cluster"); } finally { if (!success) { _clusterDetailsDao.deleteDetails(cluster.getId()); _clusterDao.remove(cluster.getId()); } } } @Override public Discoverer getMatchingDiscover(final Hypervisor.HypervisorType hypervisorType) { for (final Discoverer discoverer : _discoverers) { if (discoverer.getHypervisorType() == hypervisorType) { return discoverer; } } return null; } @Override public List discoverHosts(final AddHostCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { Long dcId = cmd.getZoneId(); final Long podId = cmd.getPodId(); final Long clusterId = cmd.getClusterId(); String clusterName = cmd.getClusterName(); final String url = cmd.getUrl(); final String username = cmd.getUsername(); final String password = cmd.getPassword(); final List hostTags = cmd.getHostTags(); dcId = _accountMgr.checkAccessAndSpecifyAuthority(CallContext.current().getCallingAccount(), dcId); // this is for standalone option if (clusterName == null && clusterId == null) { clusterName = "Standalone-" + url; } if (clusterId != null) { final ClusterVO cluster = _clusterDao.findById(clusterId); if (cluster == null) { final InvalidParameterValueException ex = new InvalidParameterValueException("can not find cluster for specified clusterId"); ex.addProxyObject(clusterId.toString(), "clusterId"); throw ex; } else { if (cluster.getGuid() == null) { final List hostIds = _hostDao.listIdsByClusterId(clusterId); if (!hostIds.isEmpty()) { final CloudRuntimeException ex = new CloudRuntimeException("Guid is not updated for cluster with specified cluster id; need to wait for hosts in this cluster to come up"); ex.addProxyObject(cluster.getUuid(), "clusterId"); throw ex; } } } } String hypervisorType = cmd.getHypervisor().equalsIgnoreCase(HypervisorGuru.HypervisorCustomDisplayName.value()) ? "Custom" : cmd.getHypervisor(); return discoverHostsFull(dcId, podId, clusterId, clusterName, url, username, password, hypervisorType, hostTags, cmd.getFullUrlParams(), false); } @Override public List discoverHosts(final AddSecondaryStorageCmd cmd) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { final Long dcId = cmd.getZoneId(); final String url = cmd.getUrl(); return discoverHostsFull(dcId, null, null, null, url, null, null, "SecondaryStorage", null, null, false); } private List discoverHostsFull(final Long dcId, final Long podId, Long clusterId, final String clusterName, String url, String username, String password, final String hypervisorType, final List hostTags, final Map params, final boolean deferAgentCreation) throws IllegalArgumentException, DiscoveryException, InvalidParameterValueException { URI uri; // Check if the zone exists in the system final DataCenterVO zone = _dcDao.findById(dcId); if (zone == null) { throw new InvalidParameterValueException("Can't find zone by id " + dcId); } final Account account = CallContext.current().getCallingAccount(); if (Grouping.AllocationState.Disabled == zone.getAllocationState() && !_accountMgr.isRootAdmin(account.getId())) { final PermissionDeniedException ex = new PermissionDeniedException("Cannot perform this operation, Zone with specified id is currently disabled"); ex.addProxyObject(zone.getUuid(), "dcId"); throw ex; } // Check if the pod exists in the system HostPodVO pod = null; if (podId != null) { pod = _podDao.findById(podId); if (pod == null) { throw new InvalidParameterValueException("Can't find pod by id " + podId); } // check if pod belongs to the zone if (!Long.valueOf(pod.getDataCenterId()).equals(dcId)) { final InvalidParameterValueException ex = new InvalidParameterValueException(String.format("Pod with specified pod %s doesn't belong to the zone with specified zone %s", pod, zone)); ex.addProxyObject(pod.getUuid(), "podId"); ex.addProxyObject(zone.getUuid(), "dcId"); throw ex; } } // Verify cluster information and create a new cluster if needed if (clusterName != null && clusterId != null) { throw new InvalidParameterValueException("Can't specify cluster by both id and name"); } if (hypervisorType == null || hypervisorType.isEmpty()) { throw new InvalidParameterValueException("Need to specify Hypervisor Type"); } if ((clusterName != null || clusterId != null) && podId == null) { throw new InvalidParameterValueException("Can't specify cluster without specifying the pod"); } List skipList = Arrays.asList(HypervisorType.VMware.name().toLowerCase(Locale.ROOT), Type.SecondaryStorage.name().toLowerCase(Locale.ROOT)); if (!skipList.contains(hypervisorType.toLowerCase(Locale.ROOT))) { if (HypervisorType.KVM.toString().equalsIgnoreCase(hypervisorType)) { if (StringUtils.isBlank(username)) { throw new InvalidParameterValueException("Username need to be provided."); } } else { if (StringUtils.isBlank(username) || StringUtils.isBlank(password)) { throw new InvalidParameterValueException("Username and Password need to be provided."); } } } ClusterVO cluster = null; if (clusterId != null) { cluster = _clusterDao.findById(clusterId); if (cluster == null) { throw new InvalidParameterValueException("Can't find cluster by id " + clusterId); } if (hypervisorType.equalsIgnoreCase(HypervisorType.VMware.toString())) { // VMware only allows adding host to an existing cluster, as we // already have a lot of information // in cluster object, to simplify user input, we will construct // necessary information here final Map clusterDetails = _clusterDetailsDao.findDetails(clusterId); username = clusterDetails.get("username"); assert username != null; password = clusterDetails.get("password"); assert password != null; try { uri = new URI(UriUtils.encodeURIComponent(url)); url = clusterDetails.get("url") + "/" + uri.getHost(); } catch (final URISyntaxException e) { throw new InvalidParameterValueException(url + " is not a valid uri"); } } } if ((hypervisorType.equalsIgnoreCase(HypervisorType.BareMetal.toString()))) { if (hostTags.isEmpty()) { throw new InvalidParameterValueException("hosttag is mandatory while adding host of type Baremetal"); } } if (clusterName != null) { if (pod == null) { throw new InvalidParameterValueException("Can't find pod by id " + podId); } cluster = new ClusterVO(dcId, podId, clusterName); cluster.setHypervisorType(hypervisorType); try { cluster = _clusterDao.persist(cluster); } catch (final Exception e) { cluster = _clusterDao.findBy(clusterName, podId); if (cluster == null) { final CloudRuntimeException ex = new CloudRuntimeException("Unable to create cluster " + clusterName + " in pod with specified podId and data center with specified dcID", e); ex.addProxyObject(pod.getUuid(), "podId"); ex.addProxyObject(zone.getUuid(), "dcId"); throw ex; } } clusterId = cluster.getId(); if (_clusterDetailsDao.findDetail(clusterId, VmDetailConstants.CPU_OVER_COMMIT_RATIO) == null) { final ClusterDetailsVO cluster_cpu_detail = new ClusterDetailsVO(clusterId, VmDetailConstants.CPU_OVER_COMMIT_RATIO, "1"); final ClusterDetailsVO cluster_memory_detail = new ClusterDetailsVO(clusterId, VmDetailConstants.MEMORY_OVER_COMMIT_RATIO, "1"); _clusterDetailsDao.persist(cluster_cpu_detail); _clusterDetailsDao.persist(cluster_memory_detail); } } try { uri = new URI(UriUtils.encodeURIComponent(url)); if (uri.getScheme() == null) { throw new InvalidParameterValueException("uri.scheme is null " + url + ", add nfs:// (or cifs://) as a prefix"); } else if (uri.getScheme().equalsIgnoreCase("nfs")) { if (uri.getHost() == null || uri.getHost().equalsIgnoreCase("") || uri.getPath() == null || uri.getPath().equalsIgnoreCase("")) { throw new InvalidParameterValueException("Your host and/or path is wrong. Make sure it's of the format nfs://hostname/path"); } } else if (uri.getScheme().equalsIgnoreCase("cifs")) { // Don't validate against a URI encoded URI. final URI cifsUri = new URI(url); final String warnMsg = UriUtils.getCifsUriParametersProblems(cifsUri); if (warnMsg != null) { throw new InvalidParameterValueException(warnMsg); } } } catch (final URISyntaxException e) { throw new InvalidParameterValueException(url + " is not a valid uri"); } final List hosts = new ArrayList<>(); logger.info("Trying to add a new host at {} in data center {}", url, zone); boolean isHypervisorTypeSupported = false; for (final Discoverer discoverer : _discoverers) { if (params != null) { discoverer.putParam(params); } if (!discoverer.matchHypervisor(hypervisorType)) { continue; } isHypervisorTypeSupported = true; Map> resources = null; processResourceEvent(ResourceListener.EVENT_DISCOVER_BEFORE, dcId, podId, clusterId, uri, username, password, hostTags); try { resources = discoverer.find(dcId, podId, clusterId, uri, username, password, hostTags); } catch (final DiscoveryException e) { String errorMsg = String.format("Could not add host at [%s] with zone [%s], pod [%s] and cluster [%s] due to: [%s].", uri, zone, pod, cluster, e.getMessage()); if (logger.isDebugEnabled()) { logger.debug(errorMsg, e); } throw new DiscoveryException(errorMsg, e); } catch (final Exception e) { String err = "Exception in host discovery process with discoverer: " + discoverer.getName(); logger.info(err + ", skip to another discoverer if there is any"); if (logger.isDebugEnabled()) { logger.debug(err + ":" + e.getMessage(), e); } } processResourceEvent(ResourceListener.EVENT_DISCOVER_AFTER, resources); if (resources != null) { for (final Map.Entry> entry : resources.entrySet()) { final ServerResource resource = entry.getKey(); /* * For KVM, if we go to here, that means kvm agent is * already connected to mgt svr. */ if (resource instanceof KvmDummyResourceBase) { final Map details = entry.getValue(); final String guid = details.get("guid"); final List kvmHosts = listAllUpAndEnabledHosts(Host.Type.Routing, clusterId, podId, dcId); for (final HostVO host : kvmHosts) { if (host.getGuid().equalsIgnoreCase(guid)) { if (hostTags != null) { if (logger.isTraceEnabled()) { logger.trace("Adding Host Tags for KVM host, tags: :" + hostTags); } _hostTagsDao.persist(host.getId(), hostTags, false); } hosts.add(host); _agentMgr.notifyMonitorsOfNewlyAddedHost(host.getId()); return hosts; } } return null; } HostVO host; if (deferAgentCreation) { host = (HostVO)createHostAndAgentDeferred(resource, entry.getValue(), true, hostTags, false); } else { host = (HostVO)createHostAndAgent(resource, entry.getValue(), true, hostTags, false); } if (host != null) { hosts.add(host); } discoverer.postDiscovery(hosts, _nodeId); } logger.info("server resources successfully discovered by " + discoverer.getName()); return hosts; } } if (!isHypervisorTypeSupported) { final String msg = "Do not support HypervisorType " + hypervisorType + " for " + url; logger.warn(msg); throw new DiscoveryException(msg); } String errorMsg = "Cannot find the server resources at " + url; logger.warn(errorMsg); throw new DiscoveryException("Unable to add the host: " + errorMsg); } @Override public Host getHost(final long hostId) { return _hostDao.findById(hostId); } @DB protected boolean doDeleteHost(final long hostId, final boolean isForced, final boolean isForceDeleteStorage) { _accountMgr.getActiveUser(CallContext.current().getCallingUserId()); // Verify that host exists final HostVO host = _hostDao.findById(hostId); if (host == null) { throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); } _accountMgr.checkAccessAndSpecifyAuthority(CallContext.current().getCallingAccount(), host.getDataCenterId()); if (!canDeleteHost(host) && !isForced) { throw new CloudRuntimeException("Host " + host.getUuid() + " cannot be deleted as it is not in maintenance mode. Either put the host into maintenance or perform a forced deletion."); } // Get storage pool host mappings here because they can be removed as a // part of handleDisconnect later final List pools = _storagePoolHostDao.listByHostIdIncludingRemoved(hostId); final ResourceStateAdapter.DeleteHostAnswer answer = (ResourceStateAdapter.DeleteHostAnswer)dispatchToStateAdapters(ResourceStateAdapter.Event.DELETE_HOST, false, host, isForced, isForceDeleteStorage); if (answer == null) { throw new CloudRuntimeException(String.format("No resource adapter respond to DELETE_HOST event for %s, hypervisorType is %s, host type is %s", host, host.getHypervisorType(), host.getType())); } if (answer.getIsException()) { return false; } if (!answer.getIsContinue()) { return true; } Long clusterId = host.getClusterId(); _agentMgr.notifyMonitorsOfHostAboutToBeRemoved(host.getId()); Transaction.execute(new TransactionCallbackNoReturn() { @Override public void doInTransactionWithoutResult(final TransactionStatus status) { _dcDao.releasePrivateIpAddress(host.getPrivateIpAddress(), host.getDataCenterId(), null); _agentMgr.disconnectWithoutInvestigation(hostId, Status.Event.Remove); // delete host details _hostDetailsDao.deleteDetails(hostId); // if host is GPU enabled, delete GPU entries _hostGpuGroupsDao.deleteGpuEntries(hostId); // delete host tags _hostTagsDao.deleteTags(hostId); host.setGuid(null); final Long clusterId = host.getClusterId(); host.setClusterId(null); _hostDao.update(host.getId(), host); Host hostRemoved = _hostDao.findById(hostId); _hostDao.remove(hostId); if (clusterId != null) { final List hostIds = _hostDao.listIdsByClusterId(clusterId); if (CollectionUtils.isEmpty(hostIds)) { final ClusterVO cluster = _clusterDao.findById(clusterId); cluster.setGuid(null); _clusterDao.update(clusterId, cluster); } } try { resourceStateTransitTo(host, ResourceState.Event.DeleteHost, _nodeId); } catch (final NoTransitionException e) { logger.debug(String.format("Cannot transit %s to Enabled state", host), e); } // Delete the associated entries in host ref table _storagePoolHostDao.deletePrimaryRecordsForHost(hostId); // Make sure any VMs that were marked as being on this host are cleaned up final List vms = _vmDao.listByHostId(hostId); for (final VMInstanceVO vm : vms) { // this is how VirtualMachineManagerImpl does it when it syncs VM states vm.setState(State.Stopped); vm.setHostId(null); _vmDao.persist(vm); } // For pool ids you got, delete local storage host entries in pool table // where for (final StoragePoolHostVO pool : pools) { final Long poolId = pool.getPoolId(); final StoragePoolVO storagePool = _storagePoolDao.findById(poolId); if (storagePool.isLocal() && isForceDeleteStorage) { destroyLocalStoragePoolVolumes(poolId); storagePool.setUuid(null); storagePool.setClusterId(null); _storagePoolDao.update(poolId, storagePool); _storagePoolDao.remove(poolId); logger.debug("Local storage [id: {}] is removed as a part of {} removal", storagePool, hostRemoved); } } // delete the op_host_capacity entry final Object[] capacityTypes = {Capacity.CAPACITY_TYPE_CPU, Capacity.CAPACITY_TYPE_MEMORY, Capacity.CAPACITY_TYPE_CPU_CORE}; final SearchCriteria hostCapacitySC = _capacityDao.createSearchCriteria(); hostCapacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, hostId); hostCapacitySC.addAnd("capacityType", SearchCriteria.Op.IN, capacityTypes); _capacityDao.remove(hostCapacitySC); // remove from dedicated resources final DedicatedResourceVO dr = _dedicatedDao.findByHostId(hostId); if (dr != null) { _dedicatedDao.remove(dr.getId()); } // Remove comments (if any) annotationDao.removeByEntityType(AnnotationService.EntityType.HOST.name(), host.getUuid()); } }); if (clusterId != null) { _agentMgr.notifyMonitorsOfRemovedHost(host.getId(), clusterId); } return true; } private void addVolumesToList(List volumes, List volumesToAdd) { if (CollectionUtils.isNotEmpty(volumesToAdd)) { volumes.addAll(volumesToAdd); } } protected void destroyLocalStoragePoolVolumes(long poolId) { List rootDisks = volumeDao.findByPoolId(poolId); List dataVolumes = volumeDao.findByPoolId(poolId, Volume.Type.DATADISK); List volumes = new ArrayList<>(); addVolumesToList(volumes, rootDisks); addVolumesToList(volumes, dataVolumes); if (CollectionUtils.isNotEmpty(volumes)) { for (VolumeVO volume : volumes) { volumeDao.updateAndRemoveVolume(volume); } } } /** * Returns true if host can be deleted.
* A host can be deleted either if it is in Maintenance or "Degraded" state. */ protected boolean canDeleteHost(HostVO host) { return host.getResourceState() == ResourceState.Maintenance || host.getResourceState() == ResourceState.Degraded; } @Override public boolean deleteHost(final long hostId, final boolean isForced, final boolean isForceDeleteStorage) { try { final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.DeleteHost); if (result != null) { return result; } } catch (final AgentUnavailableException e) { return false; } return doDeleteHost(hostId, isForced, isForceDeleteStorage); } @Override @DB public boolean deleteCluster(final DeleteClusterCmd cmd) { try { Transaction.execute(new TransactionCallbackNoReturn() { @Override public void doInTransactionWithoutResult(final TransactionStatus status) { final ClusterVO cluster = _clusterDao.lockRow(cmd.getId(), true); if (cluster == null) { if (logger.isDebugEnabled()) { logger.debug("Cluster: " + cmd.getId() + " does not even exist. Delete call is ignored."); } throw new CloudRuntimeException("Cluster: " + cmd.getId() + " does not exist"); } final Hypervisor.HypervisorType hypervisorType = cluster.getHypervisorType(); final List hostIds = _hostDao.listIdsByClusterId(cmd.getId()); if (!hostIds.isEmpty()) { logger.debug("{} still has hosts, can't remove", cluster); throw new CloudRuntimeException("Cluster: " + cmd.getId() + " cannot be removed. Cluster still has hosts"); } // don't allow to remove the cluster if it has non-removed storage // pools final List storagePools = _storagePoolDao.listPoolsByCluster(cmd.getId()); if (!storagePools.isEmpty()) { logger.debug("{} still has storage pools, can't remove", cluster); throw new CloudRuntimeException(String.format("Cluster: %s cannot be removed. Cluster still has storage pools", cluster)); } if (_clusterDao.remove(cmd.getId())) { _capacityDao.removeBy(null, null, null, cluster.getId(), null); // If this cluster is of type vmware, and if the nexus vswitch // global parameter setting is turned // on, remove the row in cluster_vsm_map for this cluster id. if (hypervisorType == HypervisorType.VMware && Boolean.parseBoolean(_configDao.getValue(Config.VmwareUseNexusVSwitch.toString()))) { _clusterVSMMapDao.removeByClusterId(cmd.getId()); } // remove from dedicated resources final DedicatedResourceVO dr = _dedicatedDao.findByClusterId(cluster.getId()); if (dr != null) { _dedicatedDao.remove(dr.getId()); } // Remove comments (if any) annotationDao.removeByEntityType(AnnotationService.EntityType.CLUSTER.name(), cluster.getUuid()); } } }); return true; } catch (final CloudRuntimeException e) { throw e; } catch (final Throwable t) { logger.error("Unable to delete cluster: {}", _clusterDao.findById(cmd.getId()), t); return false; } } @Override @DB public Cluster updateCluster(UpdateClusterCmd cmd) { ClusterVO cluster = (ClusterVO) getCluster(cmd.getId()); String clusterType = cmd.getClusterType(); String hypervisor = cmd.getHypervisor(); String allocationState = cmd.getAllocationState(); String managedstate = cmd.getManagedstate(); String name = cmd.getClusterName(); CPU.CPUArch arch = cmd.getArch(); // Verify cluster information and update the cluster if needed boolean doUpdate = false; if (StringUtils.isNotBlank(name)) { if(cluster.getHypervisorType() == HypervisorType.VMware) { throw new InvalidParameterValueException("Renaming VMware cluster is not supported as it could cause problems if the updated cluster name is not mapped on VCenter."); } logger.debug("Updating Cluster name to: " + name); cluster.setName(name); doUpdate = true; } if (hypervisor != null && !hypervisor.isEmpty()) { final Hypervisor.HypervisorType hypervisorType = Hypervisor.HypervisorType.getType(hypervisor); if (hypervisorType == null) { logger.error("Unable to resolve " + hypervisor + " to a valid supported hypervisor type"); throw new InvalidParameterValueException("Unable to resolve " + hypervisor + " to a supported type"); } else { cluster.setHypervisorType(hypervisor); doUpdate = true; } } Cluster.ClusterType newClusterType; if (clusterType != null && !clusterType.isEmpty()) { try { newClusterType = Cluster.ClusterType.valueOf(clusterType); } catch (final IllegalArgumentException ex) { throw new InvalidParameterValueException("Unable to resolve " + clusterType + " to a supported type"); } if (newClusterType == null) { logger.error("Unable to resolve " + clusterType + " to a valid supported cluster type"); throw new InvalidParameterValueException("Unable to resolve " + clusterType + " to a supported type"); } else { cluster.setClusterType(newClusterType); doUpdate = true; } } Grouping.AllocationState newAllocationState; if (allocationState != null && !allocationState.isEmpty()) { try { newAllocationState = Grouping.AllocationState.valueOf(allocationState); } catch (final IllegalArgumentException ex) { throw new InvalidParameterValueException("Unable to resolve Allocation State '" + allocationState + "' to a supported state"); } if (newAllocationState == null) { logger.error("Unable to resolve " + allocationState + " to a valid supported allocation State"); throw new InvalidParameterValueException("Unable to resolve " + allocationState + " to a supported state"); } else { cluster.setAllocationState(newAllocationState); doUpdate = true; } } Managed.ManagedState newManagedState = null; final Managed.ManagedState oldManagedState = cluster.getManagedState(); if (managedstate != null && !managedstate.isEmpty()) { try { newManagedState = Managed.ManagedState.valueOf(managedstate); } catch (final IllegalArgumentException ex) { throw new InvalidParameterValueException("Unable to resolve Managed State '" + managedstate + "' to a supported state"); } if (newManagedState == null) { logger.error("Unable to resolve Managed State '" + managedstate + "' to a supported state"); throw new InvalidParameterValueException("Unable to resolve Managed State '" + managedstate + "' to a supported state"); } else { doUpdate = true; } } if (arch != null) { List architectureTypes = _hostDao.listDistinctArchTypes(cluster.getId()); if (architectureTypes.stream().anyMatch(a -> !a.equals(arch))) { throw new InvalidParameterValueException(String.format( "Cluster has host(s) present with arch type(s): %s", StringUtils.join(architectureTypes.stream().map(CPU.CPUArch::getType).toArray()))); } cluster.setArch(arch.getType()); doUpdate = true; } if (doUpdate) { _clusterDao.update(cluster.getId(), cluster); } if (newManagedState != null && !newManagedState.equals(oldManagedState)) { if (newManagedState.equals(Managed.ManagedState.Unmanaged)) { boolean success = false; try { cluster.setManagedState(Managed.ManagedState.PrepareUnmanaged); _clusterDao.update(cluster.getId(), cluster); List hosts = listAllHosts(Host.Type.Routing, cluster.getId(), cluster.getPodId(), cluster.getDataCenterId()); for (final HostVO host : hosts) { if (host.getType().equals(Host.Type.Routing) && !host.getStatus().equals(Status.Down) && !host.getStatus().equals(Status.Disconnected) && !host.getStatus().equals(Status.Up) && !host.getStatus().equals(Status.Alert)) { final String msg = "host " + host.getPrivateIpAddress() + " should not be in " + host.getStatus().toString() + " status"; throw new CloudRuntimeException("PrepareUnmanaged Failed due to " + msg); } } for (final HostVO host : hosts) { if (host.getStatus().equals(Status.Up)) { umanageHost(host.getId()); } } final int retry = 40; boolean lsuccess; for (int i = 0; i < retry; i++) { lsuccess = true; try { Thread.sleep(5 * 1000); } catch (final InterruptedException e) { logger.debug("thread unexpectedly interrupted during wait, while updating cluster"); } hosts = listAllUpAndEnabledHosts(Host.Type.Routing, cluster.getId(), cluster.getPodId(), cluster.getDataCenterId()); for (final HostVO host : hosts) { if (!host.getStatus().equals(Status.Down) && !host.getStatus().equals(Status.Disconnected) && !host.getStatus().equals(Status.Alert)) { lsuccess = false; break; } } if (lsuccess) { success = true; break; } } if (!success) { throw new CloudRuntimeException("PrepareUnmanaged Failed due to some hosts are still in UP status after 5 Minutes, please try later "); } } finally { cluster.setManagedState(success ? Managed.ManagedState.Unmanaged : Managed.ManagedState.PrepareUnmanagedError); _clusterDao.update(cluster.getId(), cluster); } } else if (newManagedState.equals(Managed.ManagedState.Managed)) { cluster.setManagedState(Managed.ManagedState.Managed); _clusterDao.update(cluster.getId(), cluster); } } return cluster; } @Override @DB @ActionEvent(eventType = EventTypes.EVENT_MAINTENANCE_CANCEL, eventDescription = "cancel maintenance for host", async = true) public Host cancelMaintenance(final CancelMaintenanceCmd cmd) { final Long hostId = cmd.getId(); // verify input parameters final HostVO host = _hostDao.findById(hostId); if (host == null || host.getRemoved() != null) { throw new InvalidParameterValueException("Host with id " + hostId.toString() + " doesn't exist"); } if (!ResourceState.isMaintenanceState(host.getResourceState())) { throw new CloudRuntimeException(String.format("Cannot perform cancelMaintenance when resource state is %s, host: %s", host.getResourceState(), host)); } processResourceEvent(ResourceListener.EVENT_CANCEL_MAINTENANCE_BEFORE, hostId); final boolean success = cancelMaintenance(hostId); processResourceEvent(ResourceListener.EVENT_CANCEL_MAINTENANCE_AFTER, hostId); if (!success) { throw new CloudRuntimeException("Internal error cancelling maintenance."); } return host; } @Override @DB @ActionEvent(eventType = EventTypes.EVENT_HOST_RECONNECT, eventDescription = "reconnecting host", async = true) public Host reconnectHost(ReconnectHostCmd cmd) throws AgentUnavailableException { Long hostId = cmd.getId(); HostVO host = _hostDao.findById(hostId); if (host == null) { throw new InvalidParameterValueException("Host with id " + hostId.toString() + " doesn't exist"); } _agentMgr.reconnect(hostId); return host; } @Override public boolean resourceStateTransitTo(final Host host, final ResourceState.Event event, final long msId) throws NoTransitionException { final ResourceState currentState = host.getResourceState(); final ResourceState nextState = currentState.getNextState(event); if (nextState == null) { throw new NoTransitionException("No next resource state found for current state = " + currentState + " event = " + event); } // TO DO - Make it more granular and have better conversion into capacity type if(host.getType() == Type.Routing){ final CapacityState capacityState = nextState == ResourceState.Enabled ? CapacityState.Enabled : CapacityState.Disabled; final short[] capacityTypes = { Capacity.CAPACITY_TYPE_CPU, Capacity.CAPACITY_TYPE_MEMORY, Capacity.CAPACITY_TYPE_CPU_CORE }; _capacityDao.updateCapacityState(null, null, null, host.getId(), capacityState.toString(), capacityTypes); final StoragePoolVO storagePool = _storageMgr.findLocalStorageOnHost(host.getId()); if(storagePool != null){ final short[] capacityTypesLocalStorage = {Capacity.CAPACITY_TYPE_LOCAL_STORAGE}; _capacityDao.updateCapacityState(null, null, null, storagePool.getId(), capacityState.toString(), capacityTypesLocalStorage); } } return _hostDao.updateResourceState(currentState, event, nextState, host); } private void handleVmForLastHostOrWithVGpu(final HostVO host, final VMInstanceVO vm) { // Migration is not supported for VGPU Vms so stop them. // for the last host in this cluster, destroy SSVM/CPVM and stop all other VMs if (VirtualMachine.Type.SecondaryStorageVm.equals(vm.getType()) || VirtualMachine.Type.ConsoleProxy.equals(vm.getType())) { logger.error("Maintenance: VM is of type {}. Destroying VM {} immediately instead of migration.", vm.getType(), vm); _haMgr.scheduleDestroy(vm, host.getId(), HighAvailabilityManager.ReasonType.HostMaintenance); return; } logger.error("Maintenance: No hosts available for migrations. Scheduling shutdown for VM {} instead of migration.", vm); _haMgr.scheduleStop(vm, host.getId(), WorkType.ForceStop); } private boolean doMaintain(final long hostId) { final HostVO host = _hostDao.findById(hostId); logger.info("Maintenance: attempting maintenance of host {}", host); ResourceState hostState = host.getResourceState(); if (!ResourceState.canAttemptMaintenance(hostState)) { throw new CloudRuntimeException(String.format("Cannot perform maintain when resource state is %s, host = %s", hostState, host)); } final List vms = _vmDao.listByHostId(hostId); if (CollectionUtils.isNotEmpty(vms) && !HighAvailabilityManagerImpl.VmHaEnabled.valueIn(host.getDataCenterId())) { throw new CloudRuntimeException(String.format("Cannot perform maintain for the host %s (%d) as there are running VMs on it and VM high availability manager is disabled", host.getName(), hostId)); } final MaintainAnswer answer = (MaintainAnswer)_agentMgr.easySend(hostId, new MaintainCommand()); if (answer == null || !answer.getResult()) { logger.warn("Unable to send MaintainCommand to host: {}", host); return false; } try { resourceStateTransitTo(host, ResourceState.Event.AdminAskMaintenance, _nodeId); } catch (final NoTransitionException e) { final String err = String.format("Cannot transit resource state of %s to %s", host, ResourceState.Maintenance); logger.debug(err, e); throw new CloudRuntimeException(err + e.getMessage()); } ActionEventUtils.onStartedActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), EventTypes.EVENT_MAINTENANCE_PREPARE, String.format("starting maintenance for host %s", host), hostId, null, true, 0); _agentMgr.pullAgentToMaintenance(hostId); /* TODO: move below to listener */ if (host.getType() == Host.Type.Routing) { if (vms.isEmpty()) { return true; } List hosts = listAllUpAndEnabledHosts(Host.Type.Routing, host.getClusterId(), host.getPodId(), host.getDataCenterId()); if (CollectionUtils.isEmpty(hosts)) { logger.warn("Unable to find a host for vm migration in cluster: {}", _clusterDao.findById(host.getClusterId())); if (! isClusterWideMigrationPossible(host, vms, hosts)) { return false; } } for (final VMInstanceVO vm : vms) { if (hosts == null || hosts.isEmpty() || !answer.getMigrate() || _serviceOfferingDetailsDao.findDetail(vm.getServiceOfferingId(), GPU.Keys.vgpuType.toString()) != null) { handleVmForLastHostOrWithVGpu(host, vm); } else if (HypervisorType.LXC.equals(host.getHypervisorType()) && VirtualMachine.Type.User.equals(vm.getType())){ //Migration is not supported for LXC Vms. Schedule restart instead. _haMgr.scheduleRestart(vm, false, HighAvailabilityManager.ReasonType.HostMaintenance); } else if (userVmManager.isVMUsingLocalStorage(vm)) { if (isMaintenanceLocalStrategyForceStop()) { _haMgr.scheduleStop(vm, hostId, WorkType.ForceStop, HighAvailabilityManager.ReasonType.HostMaintenance); } else if (isMaintenanceLocalStrategyMigrate()) { migrateAwayVmWithVolumes(host, vm); } else if (!isMaintenanceLocalStrategyDefault()){ String logMessage = String.format( "Unsupported host.maintenance.local.storage.strategy: %s. Please set a strategy according to the global settings description: " + "'Error', 'Migration', or 'ForceStop'.", HOST_MAINTENANCE_LOCAL_STRATEGY.value()); logger.error(logMessage); throw new CloudRuntimeException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage."); } } else { logger.info("Maintenance: scheduling migration of VM {} from host {}", vm, host); _haMgr.scheduleMigration(vm, HighAvailabilityManager.ReasonType.HostMaintenance); } } } return true; } private boolean isClusterWideMigrationPossible(Host host, List vms, List hosts) { if (MIGRATE_VM_ACROSS_CLUSTERS.valueIn(host.getDataCenterId())) { DataCenterVO zone = _dcDao.findById(host.getDataCenterId()); logger.info("Looking for hosts across different clusters in zone: {}", zone); Long podId = null; for (final VMInstanceVO vm : vms) { if (VirtualMachine.systemVMs.contains(vm.getType())) { // SystemVMs can only be migrated to same pod podId = host.getPodId(); break; } } hosts.addAll(listAllUpAndEnabledHosts(Host.Type.Routing, null, podId, host.getDataCenterId())); if (CollectionUtils.isEmpty(hosts)) { logger.warn("Unable to find a host for vm migration in zone: {}", zone); return false; } logger.info("Found hosts in the zone for vm migration: " + hosts); if (HypervisorType.VMware.equals(host.getHypervisorType())) { logger.debug("Skipping pool check of volumes on VMware environment because across-cluster vm migration is supported by vMotion"); return true; } // Don't migrate vm if it has volumes on cluster-wide pool for (final VMInstanceVO vm : vms) { if (_vmMgr.checkIfVmHasClusterWideVolumes(vm.getId())) { logger.warn(String.format("VM %s cannot be migrated across cluster as it has volumes on cluster-wide pool", vm)); return false; } } } else { logger.warn(String.format("VMs cannot be migrated across cluster since %s is false for zone ID: %d", MIGRATE_VM_ACROSS_CLUSTERS.key(), host.getDataCenterId())); return false; } return true; } /** * Looks for Hosts able to allocate the VM and migrates the VM with its volume. */ private void migrateAwayVmWithVolumes(HostVO host, VMInstanceVO vm) { final DataCenterDeployment plan = new DataCenterDeployment(host.getDataCenterId(), host.getPodId(), host.getClusterId(), null, null, null); ServiceOfferingVO offeringVO = serviceOfferingDao.findById(vm.getServiceOfferingId()); final VirtualMachineProfile profile = new VirtualMachineProfileImpl(vm, null, offeringVO, null, null); plan.setMigrationPlan(true); DeployDestination dest = getDeployDestination(vm, profile, plan, host); Host destHost = dest.getHost(); try { _vmMgr.migrateWithStorage(vm.getUuid(), host.getId(), destHost.getId(), null); } catch (ResourceUnavailableException e) { throw new CloudRuntimeException(String.format("Maintenance failed, could not migrate VM (%s) with local storage from host (%s) to host (%s).", vm, host, destHost), e); } } private DeployDestination getDeployDestination(VMInstanceVO vm, VirtualMachineProfile profile, DataCenterDeployment plan, HostVO hostToAvoid) { DeployDestination dest; DeploymentPlanner.ExcludeList avoids = new DeploymentPlanner.ExcludeList(); avoids.addHost(hostToAvoid.getId()); try { dest = deploymentManager.planDeployment(profile, plan, avoids, null); } catch (InsufficientServerCapacityException e) { throw new CloudRuntimeException(String.format("Maintenance failed, could not find deployment destination for VM [id=%s, name=%s].", vm.getId(), vm.getInstanceName()), e); } if (dest == null) { throw new CloudRuntimeException(String.format("Maintenance failed, could not find deployment destination for VM [id=%s, name=%s], using plan: %s.", vm.getId(), vm.getInstanceName(), plan)); } return dest; } @Override public boolean maintain(final long hostId) throws AgentUnavailableException { final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.AdminAskMaintenance); if (result != null) { return result; } return doMaintain(hostId); } @Override @DB @ActionEvent(eventType = EventTypes.EVENT_MAINTENANCE_PREPARE, eventDescription = "prepare maintenance for host", async = true) public Host maintain(final PrepareForMaintenanceCmd cmd) { final Long hostId = cmd.getId(); final HostVO host = _hostDao.findById(hostId); if (host == null) { logger.debug("Unable to find host " + hostId); throw new InvalidParameterValueException("Unable to find host with ID: " + hostId + ". Please specify a valid host ID."); } if (!ResourceState.canAttemptMaintenance(host.getResourceState())) { throw new CloudRuntimeException("Host is already in state " + host.getResourceState() + ". Cannot recall for maintenance until resolved."); } if (SET_HOST_DOWN_TO_MAINTENANCE.valueIn(host.getDataCenterId()) && (host.getStatus() == Status.Down)) { if (host.getResourceState() == ResourceState.Enabled) { _hostDao.updateResourceState(ResourceState.Enabled, ResourceState.Event.AdminAskMaintenance, ResourceState.PrepareForMaintenance, host); _hostDao.updateResourceState(ResourceState.PrepareForMaintenance, ResourceState.Event.InternalEnterMaintenance, ResourceState.Maintenance, host); return _hostDao.findById(hostId); } else if (host.getResourceState() == ResourceState.ErrorInMaintenance) { _hostDao.updateResourceState(ResourceState.ErrorInMaintenance, ResourceState.Event.InternalEnterMaintenance, ResourceState.Maintenance, host); return _hostDao.findById(hostId); } } if (_hostDao.countBy(host.getClusterId(), ResourceState.PrepareForMaintenance, ResourceState.ErrorInPrepareForMaintenance) > 0) { throw new CloudRuntimeException(String.format("There are other servers attempting migrations for maintenance. " + "Found hosts in PrepareForMaintenance OR ErrorInPrepareForMaintenance STATUS in cluster %s", _clusterDao.findById(host.getClusterId()))); } if (_storageMgr.isLocalStorageActiveOnHost(host.getId())) { if(!isMaintenanceLocalStrategyMigrate() && !isMaintenanceLocalStrategyForceStop()) { throw new CloudRuntimeException("There are active VMs using the host's local storage pool. Please stop all VMs on this host that use local storage."); } } List migratingInVMs = _vmDao.findByHostInStates(hostId, State.Migrating); if (!migratingInVMs.isEmpty()) { throw new CloudRuntimeException("Host contains incoming VMs migrating. Please wait for them to complete before putting to maintenance."); } if (!_vmDao.findByHostInStates(hostId, State.Starting, State.Stopping).isEmpty()) { throw new CloudRuntimeException("Host contains VMs in starting/stopping state. Please wait for them to complete before putting to maintenance."); } if (!_vmDao.findByHostInStates(hostId, State.Error, State.Unknown).isEmpty()) { throw new CloudRuntimeException("Host contains VMs in error/unknown/shutdown state. Please fix errors to proceed."); } try { processResourceEvent(ResourceListener.EVENT_PREPARE_MAINTENANCE_BEFORE, hostId); if (maintain(hostId)) { processResourceEvent(ResourceListener.EVENT_PREPARE_MAINTENANCE_AFTER, hostId); return _hostDao.findById(hostId); } else { throw new CloudRuntimeException(String.format("Unable to prepare for maintenance host %s", host)); } } catch (final AgentUnavailableException e) { throw new CloudRuntimeException(String.format("Unable to prepare for maintenance host %s", host)); } } protected boolean isMaintenanceLocalStrategyMigrate() { if(StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value())) { return false; } return HOST_MAINTENANCE_LOCAL_STRATEGY.value().equalsIgnoreCase(WorkType.Migration.toString()); } protected boolean isMaintenanceLocalStrategyForceStop() { if(StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value())) { return false; } return HOST_MAINTENANCE_LOCAL_STRATEGY.value().equalsIgnoreCase(WorkType.ForceStop.toString()); } /** * Returns true if the host.maintenance.local.storage.strategy is the Default: "Error", blank, empty, or null. */ protected boolean isMaintenanceLocalStrategyDefault() { return StringUtils.isBlank(HOST_MAINTENANCE_LOCAL_STRATEGY.value()) || HOST_MAINTENANCE_LOCAL_STRATEGY.value().equalsIgnoreCase(State.Error.toString()); } /** * Declares host as Degraded. This method is used in critical situations; e.g. if it is not possible to start host, not even via out-of-band. */ @Override public Host declareHostAsDegraded(final DeclareHostAsDegradedCmd cmd) throws NoTransitionException { Long hostId = cmd.getId(); HostVO host = _hostDao.findById(hostId); if (host == null || StringUtils.isBlank(host.getName())) { throw new InvalidParameterValueException(String.format("Host [id:%s] does not exist.", hostId)); } else if (host.getRemoved() != null){ throw new InvalidParameterValueException(String.format("Host [id:%s, uuid: %s, name:%s] does not exist or it has been removed.", hostId, host.getUuid(), host.getName())); } if (host.getResourceState() == ResourceState.Degraded) { throw new NoTransitionException(String.format("Host (%s) was already marked as Degraded.", host)); } if (host.getStatus() != Status.Alert && host.getStatus() != Status.Disconnected) { throw new InvalidParameterValueException(String.format("Cannot perform declare host (%s) as 'Degraded' when host is in %s status", host, host.getStatus())); } try { resourceStateTransitTo(host, ResourceState.Event.DeclareHostDegraded, _nodeId); host.setResourceState(ResourceState.Degraded); } catch (NoTransitionException e) { logger.error("Cannot transmit host [id:{}, uuid: {}, name:{}, state:{}, status:{}] to {} state", host.getId(), host.getUuid(), host.getName(), host.getState(), host.getStatus(), ResourceState.Event.DeclareHostDegraded, e); throw e; } scheduleVmsRestart(host); return host; } /** * This method assumes that the host is Degraded; therefore it schedule VMs to be re-started by the HA manager. */ private void scheduleVmsRestart(Host host) { List allVmsOnHost = _vmDao.listByHostId(host.getId()); if (CollectionUtils.isEmpty(allVmsOnHost)) { logger.debug("Host ({}) was marked as Degraded with no allocated VMs, no need to schedule VM restart", host); } logger.debug("Host ({}) was marked as Degraded with a total of {} allocated VMs. Triggering HA to start VMs that have HA enabled.", host, allVmsOnHost.size()); for (VMInstanceVO vm : allVmsOnHost) { State vmState = vm.getState(); if (vmState == State.Starting || vmState == State.Running || vmState == State.Stopping) { _haMgr.scheduleRestart(vm, false, HighAvailabilityManager.ReasonType.HostDegraded); } } } /** * Changes a host from 'Degraded' to 'Enabled' ResourceState. */ @Override public Host cancelHostAsDegraded(final CancelHostAsDegradedCmd cmd) throws NoTransitionException { Long hostId = cmd.getId(); HostVO host = _hostDao.findById(hostId); if (host == null || host.getRemoved() != null) { throw new InvalidParameterValueException(String.format("Host (%s) with id %d does not exist", host, hostId)); } if (host.getResourceState() != ResourceState.Degraded) { throw new NoTransitionException( String.format("Cannot perform cancelHostAsDegraded on host (%s) when host is in %s state", host, host.getResourceState())); } try { resourceStateTransitTo(host, ResourceState.Event.EnableDegradedHost, _nodeId); host.setResourceState(ResourceState.Enabled); } catch (NoTransitionException e) { throw new NoTransitionException( String.format("Cannot transmit host (id=%s, uuid=%s, name=%s, state=%s, status=%s] to %s state", host.getId(), host.getUuid(), host.getName(), host.getResourceState(), host.getStatus(), ResourceState.Enabled)); } return host; } /** * Add VNC details as user VM details for each VM in 'vms' (KVM hosts only) */ protected void setKVMVncAccess(long hostId, List vms) { for (VMInstanceVO vm : vms) { GetVncPortAnswer vmVncPortAnswer = (GetVncPortAnswer) _agentMgr.easySend(hostId, new GetVncPortCommand(vm.getId(), vm.getInstanceName())); if (vmVncPortAnswer != null) { userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.KVM_VNC_ADDRESS, vmVncPortAnswer.getAddress(), true); userVmDetailsDao.addDetail(vm.getId(), VmDetailConstants.KVM_VNC_PORT, String.valueOf(vmVncPortAnswer.getPort()), true); } } } /** * Configure VNC access for host VMs which have failed migrating to another host while trying to enter Maintenance mode */ protected void configureVncAccessForKVMHostFailedMigrations(HostVO host, List failedMigrations) { if (host.getHypervisorType().equals(HypervisorType.KVM)) { _agentMgr.pullAgentOutMaintenance(host.getId()); setKVMVncAccess(host.getId(), failedMigrations); _agentMgr.pullAgentToMaintenance(host.getId()); } } /** * Safely transit host into Maintenance mode */ protected boolean setHostIntoMaintenance(HostVO host) throws NoTransitionException { logger.debug("Host {} entering in Maintenance", host); resourceStateTransitTo(host, ResourceState.Event.InternalEnterMaintenance, _nodeId); ActionEventUtils.onCompletedActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), EventVO.LEVEL_INFO, EventTypes.EVENT_MAINTENANCE_PREPARE, String.format("completed maintenance for host %s", host), host.getId(), null, 0); return true; } /** * Set host into ErrorInMaintenance state, as errors occurred during VM migrations. Do the following: * - Cancel scheduled migrations for those which have already failed * - Configure VNC access for VMs (KVM hosts only) */ protected boolean setHostIntoErrorInMaintenance(HostVO host, List errorVms) throws NoTransitionException { logger.debug("Unable to migrate / fix errors for {} VM(s) from host {}", errorVms.size(), host); _haMgr.cancelScheduledMigrations(host); configureVncAccessForKVMHostFailedMigrations(host, errorVms); resourceStateTransitTo(host, ResourceState.Event.UnableToMaintain, _nodeId); return false; } protected boolean setHostIntoErrorInPrepareForMaintenance(HostVO host, List errorVms) throws NoTransitionException { logger.debug("Host {} entering in PrepareForMaintenanceWithErrors state", host); configureVncAccessForKVMHostFailedMigrations(host, errorVms); resourceStateTransitTo(host, ResourceState.Event.UnableToMigrate, _nodeId); return false; } protected boolean setHostIntoPrepareForMaintenanceAfterErrorsFixed(HostVO host) throws NoTransitionException { logger.debug("Host {} entering in PrepareForMaintenance state as any previous corrections have been fixed", host); resourceStateTransitTo(host, ResourceState.Event.ErrorsCorrected, _nodeId); return false; } /** * Return true if host goes into Maintenance mode. There are various possibilities for VMs' states * on a host. We need to track the various VM states on each run and accordingly transit to the * appropriate state. * We change states as follows - * 1. If there are no VMs in running, migrating, starting, stopping, error, unknown states we can move * to maintenance state. Note that there cannot be incoming migrations as the API Call prepare for * maintenance checks incoming migrations before starting. * 2. If there errors (like migrating VMs, error VMs, etc) we mark as ErrorInPrepareForMaintenance but * don't stop remaining migrations/ongoing legitimate operations. * 3. If all migration retries, legitimate operations have finished we check for VMs on the host and if * there are still VMs in error state or in running state or failed migrations we mark the VM as * ErrorInMaintenance state. * 4. Lastly if there are no errors or failed migrations or running VMs but there are still pending * legitimate operations and the host was in ErrorInPrepareForMaintenance, we push the host back * to PrepareForMaintenance state. */ protected boolean attemptMaintain(HostVO host) throws NoTransitionException { final long hostId = host.getId(); logger.info(String.format("Attempting maintenance for %s", host)); // Step 0: First gather if VMs have pending HAWork for migration with retries left. final List allVmsOnHost = _vmDao.listByHostId(hostId); final boolean hasMigratingAwayVms = CollectionUtils.isNotEmpty(_vmDao.listVmsMigratingFromHost(hostId)); boolean hasPendingMigrationRetries = false; for (VMInstanceVO vmInstanceVO : allVmsOnHost) { if (_haMgr.hasPendingMigrationsWork(vmInstanceVO.getId())) { logger.info(String.format("Attempting maintenance for %s found pending migration for %s.", host, vmInstanceVO)); hasPendingMigrationRetries = true; break; } } // Step 1: If there are no VMs in migrating, running, starting, stopping, error or unknown state we can safely move the host to maintenance. if (!hasMigratingAwayVms && CollectionUtils.isEmpty(_vmDao.findByHostInStates(host.getId(), State.Migrating, State.Running, State.Starting, State.Stopping, State.Error, State.Unknown))) { if (hasPendingMigrationRetries) { logger.error("There should not be pending retries VMs for this host as there are no running, migrating," + "starting, stopping, error or unknown states on host " + host); } return setHostIntoMaintenance(host); } // Step 2: Gather relevant VMs' states on the host and then based on them we can determine if final List failedMigrations = new ArrayList<>(_vmDao.listNonMigratingVmsByHostEqualsLastHost(hostId)); final List errorVms = new ArrayList<>(_vmDao.findByHostInStates(hostId, State.Unknown, State.Error)); final boolean hasRunningVms = CollectionUtils.isNotEmpty(_vmDao.findByHostInStates(hostId, State.Running)); final boolean hasFailedMigrations = CollectionUtils.isNotEmpty(failedMigrations); final boolean hasVmsInFailureStates = CollectionUtils.isNotEmpty(errorVms); final boolean hasStoppingVms = CollectionUtils.isNotEmpty(_vmDao.findByHostInStates(hostId, State.Stopping)); errorVms.addAll(failedMigrations); // Step 3: If there are no pending migration retries but host still has running VMs or, // host has VMs in failure state / failed migrations we move the host to ErrorInMaintenance state. if ((!hasPendingMigrationRetries && !hasMigratingAwayVms && hasRunningVms) || (!hasRunningVms && !hasMigratingAwayVms && hasVmsInFailureStates)) { return setHostIntoErrorInMaintenance(host, errorVms); } // Step 4: IF there are pending migrations or ongoing retries left or stopping VMs and there were errors or failed // migrations we put the host into ErrorInPrepareForMaintenance if ((hasPendingMigrationRetries || hasMigratingAwayVms || hasStoppingVms) && (hasVmsInFailureStates || hasFailedMigrations)) { return setHostIntoErrorInPrepareForMaintenance(host, errorVms); } // Step 5: If there were previously errors found, but not anymore it means the operator has fixed errors and we put // the host into PrepareForMaintenance state. if (host.getResourceState() == ResourceState.ErrorInPrepareForMaintenance) { return setHostIntoPrepareForMaintenanceAfterErrorsFixed(host); } return false; } @Override public boolean checkAndMaintain(final long hostId) { boolean hostInMaintenance = false; final HostVO host = _hostDao.findById(hostId); try { if (host.getType() != Host.Type.Storage) { hostInMaintenance = attemptMaintain(host); } } catch (final NoTransitionException e) { logger.warn(String.format("Cannot transit %s from %s to Maintenance state.", host, host.getResourceState()), e); } return hostInMaintenance; } private ResourceState.Event getResourceEventFromAllocationStateString(String allocationState) { final ResourceState.Event resourceEvent = ResourceState.Event.toEvent(allocationState); if (resourceEvent != ResourceState.Event.Enable && resourceEvent != ResourceState.Event.Disable) { throw new InvalidParameterValueException(String.format("Invalid allocation state: %s, " + "only Enable/Disable are allowed", allocationState)); } return resourceEvent; } private void handleAutoEnableDisableKVMHost(boolean autoEnableDisableKVMSetting, boolean isUpdateFromHostHealthCheck, HostVO host, DetailVO hostDetail, ResourceState.Event resourceEvent) { if (autoEnableDisableKVMSetting) { if (!isUpdateFromHostHealthCheck && hostDetail != null && !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable) { hostDetail.setValue(Boolean.TRUE.toString()); _hostDetailsDao.update(hostDetail.getId(), hostDetail); } else if (!isUpdateFromHostHealthCheck && hostDetail != null && Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Disable) { logger.info("The setting {} is enabled but {} is manually set into {} state," + "ignoring future auto enabling of the host based on health check results", AgentManager.EnableKVMAutoEnableDisable.key(), host, resourceEvent); hostDetail.setValue(Boolean.FALSE.toString()); _hostDetailsDao.update(hostDetail.getId(), hostDetail); } else if (hostDetail == null) { String autoEnableValue = !isUpdateFromHostHealthCheck ? Boolean.FALSE.toString() : Boolean.TRUE.toString(); hostDetail = new DetailVO(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST, autoEnableValue); _hostDetailsDao.persist(hostDetail); } } } private boolean updateHostAllocationState(HostVO host, String allocationState, boolean isUpdateFromHostHealthCheck) throws NoTransitionException { boolean autoEnableDisableKVMSetting = AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()) && host.getHypervisorType() == HypervisorType.KVM; ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState); DetailVO hostDetail = _hostDetailsDao.findDetail(host.getId(), ApiConstants.AUTO_ENABLE_KVM_HOST); if ((host.getResourceState() == ResourceState.Enabled && resourceEvent == ResourceState.Event.Enable) || (host.getResourceState() == ResourceState.Disabled && resourceEvent == ResourceState.Event.Disable)) { logger.info(String.format("The host %s is already on the allocated state", host.getName())); return false; } if (isAutoEnableAttemptForADisabledHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, hostDetail, resourceEvent)) { logger.debug(String.format("The setting '%s' is enabled and the health check succeeds on the host, " + "but the host has been manually disabled previously, ignoring auto enabling", AgentManager.EnableKVMAutoEnableDisable.key())); return false; } handleAutoEnableDisableKVMHost(autoEnableDisableKVMSetting, isUpdateFromHostHealthCheck, host, hostDetail, resourceEvent); resourceStateTransitTo(host, resourceEvent, _nodeId); return true; } private boolean isAutoEnableAttemptForADisabledHost(boolean autoEnableDisableKVMSetting, boolean isUpdateFromHostHealthCheck, DetailVO hostDetail, ResourceState.Event resourceEvent) { return autoEnableDisableKVMSetting && isUpdateFromHostHealthCheck && hostDetail != null && !Boolean.parseBoolean(hostDetail.getValue()) && resourceEvent == ResourceState.Event.Enable; } private void updateHostName(HostVO host, String name) { logger.debug("Updating Host name to: " + name); host.setName(name); _hostDao.update(host.getId(), host); } private void updateHostGuestOSCategory(Long hostId, Long guestOSCategoryId) { // Verify that the guest OS Category exists if (!(guestOSCategoryId > 0) || _guestOSCategoryDao.findById(guestOSCategoryId) == null) { throw new InvalidParameterValueException("Please specify a valid guest OS category."); } final GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId); final DetailVO guestOSDetail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); if (guestOSCategory != null && !GuestOSCategoryVO.CATEGORY_NONE.equalsIgnoreCase(guestOSCategory.getName())) { // Create/Update an entry for guest.os.category.id if (guestOSDetail != null) { guestOSDetail.setValue(String.valueOf(guestOSCategory.getId())); _hostDetailsDao.update(guestOSDetail.getId(), guestOSDetail); } else { final Map detail = new HashMap<>(); detail.put("guest.os.category.id", String.valueOf(guestOSCategory.getId())); _hostDetailsDao.persist(hostId, detail); } } else { // Delete any existing entry for guest.os.category.id if (guestOSDetail != null) { _hostDetailsDao.remove(guestOSDetail.getId()); } } } private void updateHostTags(HostVO host, Long hostId, List hostTags, Boolean isTagARule) { List activeVMs = _vmDao.listByHostId(hostId); logger.warn(String.format("The following active VMs [%s] are using the host [%s]. " + "Updating the host tags will not affect them.", activeVMs, host)); if (logger.isDebugEnabled()) { logger.debug("Updating Host Tags to :" + hostTags); } _hostTagsDao.persist(hostId, new ArrayList<>(new HashSet<>(hostTags)), isTagARule); } @Override public Host updateHost(final UpdateHostCmd cmd) throws NoTransitionException { return updateHost(cmd.getId(), cmd.getName(), cmd.getOsCategoryId(), cmd.getAllocationState(), cmd.getUrl(), cmd.getHostTags(), cmd.getIsTagARule(), cmd.getAnnotation(), false); } private Host updateHost(Long hostId, String name, Long guestOSCategoryId, String allocationState, String url, List hostTags, Boolean isTagARule, String annotation, boolean isUpdateFromHostHealthCheck) throws NoTransitionException { // Verify that the host exists final HostVO host = _hostDao.findById(hostId); if (host == null) { throw new InvalidParameterValueException("Host with id " + hostId + " doesn't exist"); } boolean isUpdateHostAllocation = false; if (StringUtils.isNotBlank(allocationState)) { isUpdateHostAllocation = updateHostAllocationState(host, allocationState, isUpdateFromHostHealthCheck); } if (StringUtils.isNotBlank(name)) { updateHostName(host, name); } if (guestOSCategoryId != null) { updateHostGuestOSCategory(hostId, guestOSCategoryId); } if (hostTags != null) { updateHostTags(host, hostId, hostTags, isTagARule); } if (url != null) { _storageMgr.updateSecondaryStorage(hostId, url); } try { _storageMgr.enableHost(hostId); } catch (StorageUnavailableException | StorageConflictException e) { logger.error(String.format("Failed to setup host %s when enabled", host)); } final HostVO updatedHost = _hostDao.findById(hostId); sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(host, allocationState, isUpdateFromHostHealthCheck, isUpdateHostAllocation, annotation); return updatedHost; } private void sendAlertAndAnnotationForAutoEnableDisableKVMHostFeature(HostVO host, String allocationState, boolean isUpdateFromHostHealthCheck, boolean isUpdateHostAllocation, String annotation) { boolean isAutoEnableDisableKVMSettingEnabled = host.getHypervisorType() == HypervisorType.KVM && AgentManager.EnableKVMAutoEnableDisable.valueIn(host.getClusterId()); if (!isAutoEnableDisableKVMSettingEnabled) { if (StringUtils.isNotBlank(annotation)) { annotationService.addAnnotation(annotation, AnnotationService.EntityType.HOST, host.getUuid(), true); } return; } if (!isUpdateHostAllocation) { return; } String msg = String.format("The host %s (%s) ", host.getName(), host.getUuid()); ResourceState.Event resourceEvent = getResourceEventFromAllocationStateString(allocationState); boolean isEventEnable = resourceEvent == ResourceState.Event.Enable; if (isUpdateFromHostHealthCheck) { msg += String.format("is auto-%s after %s health check results", isEventEnable ? "enabled" : "disabled", isEventEnable ? "successful" : "failed"); alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), msg, msg); } else { msg += String.format("is %s despite the setting '%s' is enabled for the cluster %s", isEventEnable ? "enabled" : "disabled", AgentManager.EnableKVMAutoEnableDisable.key(), host.getClusterId()); if (StringUtils.isNotBlank(annotation)) { msg += String.format(", reason: %s", annotation); } } annotationService.addAnnotation(msg, AnnotationService.EntityType.HOST, host.getUuid(), true); } @Override public Host autoUpdateHostAllocationState(Long hostId, ResourceState.Event resourceEvent) throws NoTransitionException { return updateHost(hostId, null, null, resourceEvent.toString(), null, null, null, null, true); } @Override public Cluster getCluster(final Long clusterId) { return _clusterDao.findById(clusterId); } @Override public DataCenter getZone(Long zoneId) { return _dcDao.findById(zoneId); } @Override public boolean configure(final String name, final Map params) throws ConfigurationException { _defaultSystemVMHypervisor = HypervisorType.getType(_configDao.getValue(Config.SystemVMDefaultHypervisor.toString())); _gson = GsonHelper.getGson(); _hypervisorsInDC = _hostDao.createSearchBuilder(String.class); _hypervisorsInDC.select(null, Func.DISTINCT, _hypervisorsInDC.entity().getHypervisorType()); _hypervisorsInDC.and("hypervisorType", _hypervisorsInDC.entity().getHypervisorType(), SearchCriteria.Op.NNULL); _hypervisorsInDC.and("dataCenter", _hypervisorsInDC.entity().getDataCenterId(), SearchCriteria.Op.EQ); _hypervisorsInDC.and("id", _hypervisorsInDC.entity().getId(), SearchCriteria.Op.NEQ); _hypervisorsInDC.and("type", _hypervisorsInDC.entity().getType(), SearchCriteria.Op.EQ); _hypervisorsInDC.done(); _gpuAvailability = _hostGpuGroupsDao.createSearchBuilder(); _gpuAvailability.and("hostId", _gpuAvailability.entity().getHostId(), Op.EQ); _gpuAvailability.and("groupName", _gpuAvailability.entity().getGroupName(), Op.EQ); final SearchBuilder join1 = _vgpuTypesDao.createSearchBuilder(); join1.and("vgpuType", join1.entity().getVgpuType(), Op.EQ); join1.and("remainingCapacity", join1.entity().getRemainingCapacity(), Op.GT); _gpuAvailability.join("groupId", join1, _gpuAvailability.entity().getId(), join1.entity().getGpuGroupId(), JoinBuilder.JoinType.INNER); _gpuAvailability.done(); return true; } @Override public List getSupportedHypervisorTypes(final long zoneId, final boolean forVirtualRouter, final Long podId) { final List hypervisorTypes = new ArrayList<>(); List clustersForZone; if (podId != null) { clustersForZone = _clusterDao.listByPodId(podId); } else { clustersForZone = _clusterDao.listByZoneId(zoneId); } for (final ClusterVO cluster : clustersForZone) { final HypervisorType hType = cluster.getHypervisorType(); if (!forVirtualRouter || (hType != HypervisorType.BareMetal && hType != HypervisorType.Ovm)) { hypervisorTypes.add(hType); } } return hypervisorTypes; } @Override public HypervisorType getDefaultHypervisor(final long zoneId) { HypervisorType defaultHyper = HypervisorType.None; if (_defaultSystemVMHypervisor != HypervisorType.None) { defaultHyper = _defaultSystemVMHypervisor; } final DataCenterVO dc = _dcDao.findById(zoneId); if (dc == null) { return HypervisorType.None; } _dcDao.loadDetails(dc); final String defaultHypervisorInZone = dc.getDetail("defaultSystemVMHypervisorType"); if (defaultHypervisorInZone != null) { defaultHyper = HypervisorType.getType(defaultHypervisorInZone); } final List systemTemplates = _templateDao.listAllSystemVMTemplates(); boolean isValid = false; for (final VMTemplateVO template : systemTemplates) { if (template.getHypervisorType() == defaultHyper) { isValid = true; break; } } if (isValid) { final List clusters = _clusterDao.listByDcHyType(zoneId, defaultHyper.toString()); if (clusters.isEmpty()) { isValid = false; } } if (isValid) { return defaultHyper; } else { return HypervisorType.None; } } @Override public HypervisorType getAvailableHypervisor(final long zoneId) { HypervisorType defaultHype = getDefaultHypervisor(zoneId); if (defaultHype == HypervisorType.None) { final List supportedHypes = getSupportedHypervisorTypes(zoneId, false, null); if (!supportedHypes.isEmpty()) { Collections.shuffle(supportedHypes); defaultHype = supportedHypes.get(0); } } if (defaultHype == HypervisorType.None) { defaultHype = HypervisorType.Any; } return defaultHype; } @Override public void registerResourceStateAdapter(final String name, final ResourceStateAdapter adapter) { synchronized (_resourceStateAdapters) { if (_resourceStateAdapters.get(name) != null) { throw new CloudRuntimeException(name + " has registered"); } _resourceStateAdapters.put(name, adapter); } } @Override public void unregisterResourceStateAdapter(final String name) { synchronized (_resourceStateAdapters) { _resourceStateAdapters.remove(name); } } private Object dispatchToStateAdapters(final ResourceStateAdapter.Event event, final boolean singleTaker, final Object... args) { synchronized (_resourceStateAdapters) { final Iterator> it = _resourceStateAdapters.entrySet().iterator(); Object result = null; while (it.hasNext()) { final Map.Entry item = it.next(); final ResourceStateAdapter adapter = item.getValue(); final String msg = "Dispatching resource state event " + event + " to " + item.getKey(); logger.debug(msg); if (event == ResourceStateAdapter.Event.CREATE_HOST_VO_FOR_CONNECTED) { result = adapter.createHostVOForConnectedAgent((HostVO)args[0], (StartupCommand[])args[1]); if (result != null && singleTaker) { break; } } else if (event == ResourceStateAdapter.Event.CREATE_HOST_VO_FOR_DIRECT_CONNECT) { result = adapter.createHostVOForDirectConnectAgent((HostVO)args[0], (StartupCommand[])args[1], (ServerResource)args[2], (Map)args[3], (List)args[4]); if (result != null && singleTaker) { break; } } else if (event == ResourceStateAdapter.Event.DELETE_HOST) { try { result = adapter.deleteHost((HostVO)args[0], (Boolean)args[1], (Boolean)args[2]); if (result != null) { break; } } catch (final UnableDeleteHostException e) { logger.debug("Adapter " + adapter.getName() + " says unable to delete host", e); result = new ResourceStateAdapter.DeleteHostAnswer(false, true); } } else { throw new CloudRuntimeException("Unknown resource state event:" + event); } } return result; } } @Override public void checkCIDR(final HostPodVO pod, final DataCenterVO dc, final String serverPrivateIP, final String serverPrivateNetmask) throws IllegalArgumentException { if (serverPrivateIP == null) { return; } // Get the CIDR address and CIDR size final String cidrAddress = pod.getCidrAddress(); final long cidrSize = pod.getCidrSize(); // If the server's private IP address is not in the same subnet as the // pod's CIDR, return false final String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); final String serverSubnet = NetUtils.getSubNet(serverPrivateIP, serverPrivateNetmask); if (!cidrSubnet.equals(serverSubnet)) { logger.warn("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " + pod.getName() + " and zone: " + dc.getName()); throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " + pod.getName() + " and zone: " + dc.getName()); } // If the server's private netmask is less inclusive than the pod's CIDR // netmask, return false final String cidrNetmask = NetUtils.getCidrSubNet("255.255.255.255", cidrSize); final long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); final long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); if (serverNetmaskNumeric > cidrNetmaskNumeric) { throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " + pod.getName() + " and zone: " + dc.getName()); } } private boolean checkCIDR(final HostPodVO pod, final String serverPrivateIP, final String serverPrivateNetmask) { if (serverPrivateIP == null) { return true; } // Get the CIDR address and CIDR size final String cidrAddress = pod.getCidrAddress(); final long cidrSize = pod.getCidrSize(); // If the server's private IP address is not in the same subnet as the // pod's CIDR, return false final String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); final String serverSubnet = NetUtils.getSubNet(serverPrivateIP, serverPrivateNetmask); if (!cidrSubnet.equals(serverSubnet)) { return false; } // If the server's private netmask is less inclusive than the pod's CIDR // netmask, return false final String cidrNetmask = NetUtils.getCidrSubNet("255.255.255.255", cidrSize); final long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); final long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); return serverNetmaskNumeric <= cidrNetmaskNumeric; } private HostVO getNewHost(StartupCommand[] startupCommands) { StartupCommand startupCommand = startupCommands[0]; HostVO host = findHostByGuid(startupCommand.getGuid()); if (host != null) { return host; } host = findHostByGuid(startupCommand.getGuidWithoutResource()); return host; // even when host == null! } protected HostVO createHostVO(final StartupCommand[] cmds, final ServerResource resource, final Map details, List hostTags, final ResourceStateAdapter.Event stateEvent) { boolean newHost = false; StartupCommand startup = cmds[0]; HostVO host = getNewHost(cmds); if (host == null) { host = new HostVO(startup.getGuid()); newHost = true; } String dataCenter = startup.getDataCenter(); String pod = startup.getPod(); final String cluster = startup.getCluster(); if (pod != null && dataCenter != null && pod.equalsIgnoreCase("default") && dataCenter.equalsIgnoreCase("default")) { final List pods = _podDao.listAllIncludingRemoved(); for (final HostPodVO hpv : pods) { if (checkCIDR(hpv, startup.getPrivateIpAddress(), startup.getPrivateNetmask())) { pod = hpv.getName(); dataCenter = _dcDao.findById(hpv.getDataCenterId()).getName(); break; } } } long dcId; DataCenterVO dc = _dcDao.findByName(dataCenter); if (dc == null) { try { dcId = Long.parseLong(dataCenter != null ? dataCenter : "-1"); dc = _dcDao.findById(dcId); } catch (final NumberFormatException e) { logger.debug("Cannot parse " + dataCenter + " into Long."); } } if (dc == null) { throw new IllegalArgumentException("Host " + startup.getPrivateIpAddress() + " sent incorrect data center: " + dataCenter); } dcId = dc.getId(); HostPodVO p = _podDao.findByName(pod, dcId); if (p == null) { try { final long podId = Long.parseLong(pod != null ? pod : "-1"); p = _podDao.findById(podId); } catch (final NumberFormatException e) { logger.debug("Cannot parse " + pod + " into Long."); } } /* * ResourceStateAdapter is responsible for throwing Exception if Pod is * null and non-null is required. for example, XcpServerDiscoever. * Others, like PxeServer, ExternalFireware don't require Pod */ final Long podId = p == null ? null : p.getId(); Long clusterId = null; if (cluster != null) { try { clusterId = Long.valueOf(cluster); } catch (final NumberFormatException e) { if (podId != null) { ClusterVO c = _clusterDao.findBy(cluster, podId); if (c == null) { c = new ClusterVO(dcId, podId, cluster); c = _clusterDao.persist(c); } clusterId = c.getId(); } } } host.setDataCenterId(dc.getId()); host.setPodId(podId); host.setClusterId(clusterId); host.setPrivateIpAddress(startup.getPrivateIpAddress()); host.setPrivateNetmask(startup.getPrivateNetmask()); host.setPrivateMacAddress(startup.getPrivateMacAddress()); host.setPublicIpAddress(startup.getPublicIpAddress()); host.setPublicMacAddress(startup.getPublicMacAddress()); host.setPublicNetmask(startup.getPublicNetmask()); host.setStorageIpAddress(startup.getStorageIpAddress()); host.setStorageMacAddress(startup.getStorageMacAddress()); host.setStorageNetmask(startup.getStorageNetmask()); host.setVersion(startup.getVersion()); host.setName(startup.getName()); host.setManagementServerId(_nodeId); host.setStorageUrl(startup.getIqn()); host.setLastPinged(System.currentTimeMillis() >> 10); host.setHostTags(hostTags, false); host.setDetails(details); host.setArch(CPU.CPUArch.fromType(startup.getArch())); if (startup.getStorageIpAddressDeux() != null) { host.setStorageIpAddressDeux(startup.getStorageIpAddressDeux()); host.setStorageMacAddressDeux(startup.getStorageMacAddressDeux()); host.setStorageNetmaskDeux(startup.getStorageNetmaskDeux()); } if (resource != null) { /* null when agent is connected agent */ host.setResource(resource.getClass().getName()); } host = (HostVO)dispatchToStateAdapters(stateEvent, true, host, cmds, resource, details, hostTags); if (host == null) { throw new CloudRuntimeException("No resource state adapter response"); } if (newHost) { host = _hostDao.persist(host); } else { _hostDao.update(host.getId(), host); } if (startup instanceof StartupRoutingCommand) { final StartupRoutingCommand ssCmd = (StartupRoutingCommand)startup; _hostTagsDao.updateImplicitTags(host.getId(), ssCmd.getHostTags()); updateSupportsClonedVolumes(host, ssCmd.getSupportsClonedVolumes()); } try { resourceStateTransitTo(host, ResourceState.Event.InternalCreated, _nodeId); /* Agent goes to Connecting status */ _agentMgr.agentStatusTransitTo(host, Status.Event.AgentConnected, _nodeId); } catch (final Exception e) { logger.debug(String.format("Cannot transit %s to Creating state", host), e); _agentMgr.agentStatusTransitTo(host, Status.Event.Error, _nodeId); try { resourceStateTransitTo(host, ResourceState.Event.Error, _nodeId); } catch (final NoTransitionException e1) { logger.debug(String.format("Cannot transit %s to Error state", host), e); } } return host; } private void updateSupportsClonedVolumes(HostVO host, boolean supportsClonedVolumes) { final String name = "supportsResign"; DetailVO hostDetail = _hostDetailsDao.findDetail(host.getId(), name); if (hostDetail != null) { if (supportsClonedVolumes) { hostDetail.setValue(Boolean.TRUE.toString()); _hostDetailsDao.update(hostDetail.getId(), hostDetail); } else { _hostDetailsDao.remove(hostDetail.getId()); } } else { if (supportsClonedVolumes) { hostDetail = new DetailVO(host.getId(), name, Boolean.TRUE.toString()); _hostDetailsDao.persist(hostDetail); } } boolean clusterSupportsResigning = true; List hostIds = _hostDao.listIdsByClusterId(host.getClusterId()); for (Long hostId : hostIds) { DetailVO hostDetailVO = _hostDetailsDao.findDetail(hostId, name); if (hostDetailVO == null || !Boolean.parseBoolean(hostDetailVO.getValue())) { clusterSupportsResigning = false; break; } } ClusterDetailsVO clusterDetailsVO = _clusterDetailsDao.findDetail(host.getClusterId(), name); if (clusterDetailsVO != null) { if (clusterSupportsResigning) { clusterDetailsVO.setValue(Boolean.TRUE.toString()); _clusterDetailsDao.update(clusterDetailsVO.getId(), clusterDetailsVO); } else { _clusterDetailsDao.remove(clusterDetailsVO.getId()); } } else { if (clusterSupportsResigning) { clusterDetailsVO = new ClusterDetailsVO(host.getClusterId(), name, Boolean.TRUE.toString()); _clusterDetailsDao.persist(clusterDetailsVO); } } } private boolean isFirstHostInCluster(final HostVO host) { boolean isFirstHost = true; if (host.getClusterId() != null) { final SearchBuilder sb = _hostDao.createSearchBuilder(); sb.and("removed", sb.entity().getRemoved(), SearchCriteria.Op.NULL); sb.and("cluster", sb.entity().getClusterId(), SearchCriteria.Op.EQ); sb.done(); final SearchCriteria sc = sb.create(); sc.setParameters("cluster", host.getClusterId()); final List hosts = _hostDao.search(sc, null); if (hosts != null && hosts.size() > 1) { isFirstHost = false; } } return isFirstHost; } private void markHostAsDisconnected(HostVO host, final StartupCommand[] cmds) { if (host == null) { // in case host is null due to some errors, try // reloading the host from db if (cmds != null) { final StartupCommand firstCmd = cmds[0]; host = findHostByGuid(firstCmd.getGuid()); if (host == null) { host = findHostByGuid(firstCmd.getGuidWithoutResource()); } } } if (host != null) { // Change agent status to Alert, so that host is considered for // reconnection next time _agentMgr.agentStatusTransitTo(host, Status.Event.AgentDisconnected, _nodeId); } } private Host createHostAndAgent(final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance) { HostVO host = null; StartupCommand[] cmds = null; boolean hostExists = false; boolean created = false; try { cmds = resource.initialize(); if (cmds == null) { logger.info("Unable to fully initialize the agent because no StartupCommands are returned"); return null; } /* Generate a random version in a dev setup situation */ if (this.getClass().getPackage().getImplementationVersion() == null) { for (final StartupCommand cmd : cmds) { if (cmd.getVersion() == null) { cmd.setVersion(Long.toString(System.currentTimeMillis())); } } } if (logger.isDebugEnabled()) { new Request(-1L, -1L, cmds, true, false).logD("Startup request from directly connected host: ", true); } if (old) { final StartupCommand firstCmd = cmds[0]; host = findHostByGuid(firstCmd.getGuid()); if (host == null) { host = findHostByGuid(firstCmd.getGuidWithoutResource()); } if (host != null && host.getRemoved() == null) { // host already added, no need to add again logger.debug(String.format("Found %s by guid: %s, old host reconnected as new", host, firstCmd.getGuid())); hostExists = true; // ensures that host status is left unchanged in case of adding same one again return null; } } // find out if the host we want to connect to is new (so we can send an event) boolean newHost = getNewHost(cmds) == null; host = createHostVO(cmds, resource, details, hostTags, ResourceStateAdapter.Event.CREATE_HOST_VO_FOR_DIRECT_CONNECT); if (host != null) { created = _agentMgr.handleDirectConnectAgent(host, cmds, resource, forRebalance, newHost); /* reload myself from database */ host = _hostDao.findById(host.getId()); } } catch (final Exception e) { logger.warn("Unable to connect due to ", e); } finally { if (hostExists) { if (cmds != null) { resource.disconnected(); } } else { if (!created) { if (cmds != null) { resource.disconnected(); } markHostAsDisconnected(host, cmds); } } } return host; } private Host createHostAndAgentDeferred(final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance) { HostVO host = null; StartupCommand[] cmds = null; boolean hostExists = false; boolean deferAgentCreation = true; boolean created = false; try { cmds = resource.initialize(); if (cmds == null) { logger.info("Unable to fully initialize the agent because no StartupCommands are returned"); return null; } /* Generate a random version in a dev setup situation */ if (this.getClass().getPackage().getImplementationVersion() == null) { for (final StartupCommand cmd : cmds) { if (cmd.getVersion() == null) { cmd.setVersion(Long.toString(System.currentTimeMillis())); } } } if (logger.isDebugEnabled()) { new Request(-1L, -1L, cmds, true, false).logD("Startup request from directly connected host: ", true); } if (old) { final StartupCommand firstCmd = cmds[0]; host = findHostByGuid(firstCmd.getGuid()); if (host == null) { host = findHostByGuid(firstCmd.getGuidWithoutResource()); } if (host != null && host.getRemoved() == null) { // host already // added, no // need to add // again logger.debug(String.format("Found %s by guid %s, old host reconnected as new.", host, firstCmd.getGuid())); hostExists = true; // ensures that host status is left // unchanged in case of adding same one // again return null; } } host = null; boolean newHost = false; final GlobalLock addHostLock = GlobalLock.getInternLock("AddHostLock"); try { if (addHostLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) { // to safely determine first host in cluster in multi-MS scenario try { // find out if the host we want to connect to is new (so we can send an event) newHost = getNewHost(cmds) == null; host = createHostVO(cmds, resource, details, hostTags, ResourceStateAdapter.Event.CREATE_HOST_VO_FOR_DIRECT_CONNECT); if (host != null) { // if first host in cluster no need to defer agent creation deferAgentCreation = !isFirstHostInCluster(host); } } finally { addHostLock.unlock(); } } } finally { addHostLock.releaseRef(); } if (host != null) { if (!deferAgentCreation) { // if first host in cluster then created = _agentMgr.handleDirectConnectAgent(host, cmds, resource, forRebalance, newHost); host = _hostDao.findById(host.getId()); // reload } else { host = _hostDao.findById(host.getId()); // reload // force host status to 'Alert' so that it is loaded for // connection during next scan task _agentMgr.agentStatusTransitTo(host, Status.Event.AgentDisconnected, _nodeId); host = _hostDao.findById(host.getId()); // reload host.setLastPinged(0); // so that scan task can pick it up _hostDao.update(host.getId(), host); } } } catch (final Exception e) { logger.warn("Unable to connect due to ", e); } finally { if (hostExists) { if (cmds != null) { resource.disconnected(); } } else { if (!deferAgentCreation && !created) { if (cmds != null) { resource.disconnected(); } markHostAsDisconnected(host, cmds); } } } return host; } @Override public Host createHostAndAgent(final Long hostId, final ServerResource resource, final Map details, final boolean old, final List hostTags, final boolean forRebalance) { return createHostAndAgent(resource, details, old, hostTags, forRebalance); } @Override public Host addHost(final long zoneId, final ServerResource resource, final Type hostType, final Map hostDetails) { // Check if the zone exists in the system if (_dcDao.findById(zoneId) == null) { throw new InvalidParameterValueException("Can't find zone with id " + zoneId); } final String guid = hostDetails.get("guid"); final List currentHosts = listAllUpAndEnabledHostsInOneZoneByType(hostType, zoneId); for (final HostVO currentHost : currentHosts) { if (currentHost.getGuid().equals(guid)) { return currentHost; } } return createHostAndAgent(resource, hostDetails, true, null, false); } @Override public HostVO createHostVOForConnectedAgent(final StartupCommand[] cmds) { return createHostVO(cmds, null, null, null, ResourceStateAdapter.Event.CREATE_HOST_VO_FOR_CONNECTED); } private void checkIPConflicts(final HostPodVO pod, final DataCenterVO dc, final String serverPrivateIP, final String serverPublicIP) { // If the server's private IP is the same as is public IP, this host has // a host-only private network. Don't check for conflicts with the // private IP address table. if (!ObjectUtils.equals(serverPrivateIP, serverPublicIP)) { if (!_privateIPAddressDao.mark(dc.getId(), pod.getId(), serverPrivateIP)) { // If the server's private IP address is already in the // database, return false final List existingPrivateIPs = _privateIPAddressDao.listByPodIdDcIdIpAddress(pod.getId(), dc.getId(), serverPrivateIP); assert existingPrivateIPs.size() <= 1 : " How can we get more than one ip address with " + serverPrivateIP; if (existingPrivateIPs.size() > 1) { throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is already in use in pod: " + pod.getName() + " and zone: " + dc.getName()); } if (existingPrivateIPs.size() == 1) { final DataCenterIpAddressVO vo = existingPrivateIPs.get(0); if (vo.getNicId() != null) { throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is already in use in pod: " + pod.getName() + " and zone: " + dc.getName()); } } } } if (serverPublicIP != null && !_publicIPAddressDao.mark(dc.getId(), new Ip(serverPublicIP))) { // If the server's public IP address is already in the database, // return false final List existingPublicIPs = _publicIPAddressDao.listByDcIdIpAddress(dc.getId(), serverPublicIP); if (!existingPublicIPs.isEmpty()) { throw new IllegalArgumentException("The public ip address of the server (" + serverPublicIP + ") is already in use in zone: " + dc.getName()); } } } @Override public HostVO fillRoutingHostVO(final HostVO host, final StartupRoutingCommand ssCmd, final HypervisorType hyType, Map details, final List hostTags) { if (host.getPodId() == null) { logger.error("Host " + ssCmd.getPrivateIpAddress() + " sent incorrect pod, pod id is null"); throw new IllegalArgumentException("Host " + ssCmd.getPrivateIpAddress() + " sent incorrect pod, pod id is null"); } final ClusterVO clusterVO = _clusterDao.findById(host.getClusterId()); if (clusterVO.getHypervisorType() != hyType) { throw new IllegalArgumentException(String.format("Can't add host whose hypervisor type is: %s into cluster: %s whose hypervisor type is: %s", hyType, clusterVO, clusterVO.getHypervisorType())); } CPU.CPUArch hostCpuArch = CPU.CPUArch.fromType(ssCmd.getCpuArch()); if (hostCpuArch != null && clusterVO.getArch() != null && hostCpuArch != clusterVO.getArch()) { String msg = String.format("Can't add a host whose arch is: %s into cluster of arch type: %s", hostCpuArch.getType(), clusterVO.getArch().getType()); logger.error(msg); throw new IllegalArgumentException(msg); } final Map hostDetails = ssCmd.getHostDetails(); if (hostDetails != null) { if (details != null) { details.putAll(hostDetails); } else { details = hostDetails; } } final HostPodVO pod = _podDao.findById(host.getPodId()); final DataCenterVO dc = _dcDao.findById(host.getDataCenterId()); checkIPConflicts(pod, dc, ssCmd.getPrivateIpAddress(), ssCmd.getPublicIpAddress()); host.setType(com.cloud.host.Host.Type.Routing); host.setDetails(details); host.setCaps(ssCmd.getCapabilities()); host.setCpuSockets(ssCmd.getCpuSockets()); host.setCpus(ssCmd.getCpus()); host.setArch(hostCpuArch); host.setTotalMemory(ssCmd.getMemory()); host.setSpeed(ssCmd.getSpeed()); host.setHypervisorType(hyType); host.setHypervisorVersion(ssCmd.getHypervisorVersion()); host.setGpuGroups(ssCmd.getGpuGroupDetails()); return host; } @Override public void deleteRoutingHost(final HostVO host, final boolean isForced, final boolean forceDestroyStorage) throws UnableDeleteHostException { if (host.getType() != Host.Type.Routing) { throw new CloudRuntimeException(String.format("Non-Routing host (%s) gets in deleteRoutingHost", host)); } if (logger.isDebugEnabled()) { logger.debug(String.format("Deleting %s", host)); } final StoragePoolVO storagePool = _storageMgr.findLocalStorageOnHost(host.getId()); if (forceDestroyStorage && storagePool != null) { // put local storage into maintenance mode, will set all the VMs on // this local storage into stopped state if (storagePool.getStatus() == StoragePoolStatus.Up || storagePool.getStatus() == StoragePoolStatus.ErrorInMaintenance) { try { final StoragePool pool = _storageSvr.preparePrimaryStorageForMaintenance(storagePool.getId()); if (pool == null) { logger.debug("Failed to set primary storage into maintenance mode"); throw new UnableDeleteHostException("Failed to set primary storage into maintenance mode"); } } catch (final Exception e) { logger.debug("Failed to set primary storage into maintenance mode", e); throw new UnableDeleteHostException("Failed to set primary storage into maintenance mode, due to: " + e); } } final List vmsOnLocalStorage = _storageMgr.listByStoragePool(storagePool.getId()); for (final VMInstanceVO vm : vmsOnLocalStorage) { try { _vmMgr.destroy(vm.getUuid(), false); } catch (final Exception e) { String errorMsg = String.format("There was an error when destroying %s as a part of hostDelete for %s", vm, host); logger.debug(errorMsg, e); throw new UnableDeleteHostException(errorMsg + "," + e.getMessage()); } } } else { // Check if there are vms running/starting/stopping on this host final List vms = _vmDao.listByHostId(host.getId()); if (!vms.isEmpty()) { if (isForced) { // Stop HA disabled vms and HA enabled vms in Stopping state // Restart HA enabled vms try { resourceStateTransitTo(host, ResourceState.Event.DeleteHost, host.getId()); } catch (final NoTransitionException e) { logger.debug("Cannot transmit host {} to Disabled state", host, e); } for (final VMInstanceVO vm : vms) { if ((!HighAvailabilityManager.ForceHA.value() && !vm.isHaEnabled()) || vm.getState() == State.Stopping) { logger.debug(String.format("Stopping %s as a part of hostDelete for %s",vm, host)); try { _haMgr.scheduleStop(vm, host.getId(), WorkType.Stop); } catch (final Exception e) { final String errorMsg = String.format("There was an error stopping the %s as a part of hostDelete for %s", vm, host); logger.debug(errorMsg, e); throw new UnableDeleteHostException(errorMsg + "," + e.getMessage()); } } else if ((HighAvailabilityManager.ForceHA.value() || vm.isHaEnabled()) && (vm.getState() == State.Running || vm.getState() == State.Starting)) { logger.debug(String.format("Scheduling restart for %s, state: %s on host: %s.", vm, vm.getState(), host)); _haMgr.scheduleRestart(vm, false); } } } else { throw new UnableDeleteHostException("Unable to delete the host as there are vms in " + vms.get(0).getState() + " state using this host and isForced=false specified"); } } } } private boolean doCancelMaintenance(final long hostId) { HostVO host; host = _hostDao.findById(hostId); if (host == null || host.getRemoved() != null) { logger.warn("Unable to find host " + hostId); return true; } /* * TODO: think twice about returning true or throwing out exception, I * really prefer to exception that always exposes bugs */ if (!ResourceState.isMaintenanceState(host.getResourceState())) { throw new CloudRuntimeException(String.format("Cannot perform cancelMaintenance when resource state is %s, host = %s", host.getResourceState(), host)); } /* TODO: move to listener */ _haMgr.cancelScheduledMigrations(host); boolean vms_migrating = false; final List vms = _haMgr.findTakenMigrationWork(); for (final VMInstanceVO vm : vms) { if (vm.getHostId() != null && vm.getHostId() == hostId) { logger.warn("Unable to cancel migration because the vm is being migrated: {}, host {}", vm, host); vms_migrating = true; } } handleAgentIfNotConnected(host, vms_migrating); try { resourceStateTransitTo(host, ResourceState.Event.AdminCancelMaintenance, _nodeId); _agentMgr.pullAgentOutMaintenance(hostId); } catch (final NoTransitionException e) { logger.debug(String.format("Cannot transit %s to Enabled state", host), e); return false; } return true; } /** * Handle agent (if available) if its not connected before cancelling maintenance. * Agent must be connected before cancelling maintenance. * If the host status is not Up: * - If kvm.ssh.to.agent is true, then SSH into the host and restart the agent. * - If kvm.shh.to.agent is false, then fail cancelling maintenance */ protected void handleAgentIfNotConnected(HostVO host, boolean vmsMigrating) { final boolean isAgentOnHost = host.getHypervisorType() == HypervisorType.KVM || host.getHypervisorType() == HypervisorType.LXC; if (!isAgentOnHost || vmsMigrating || host.getStatus() == Status.Up) { return; } final boolean sshToAgent = Boolean.parseBoolean(_configDao.getValue(KvmSshToAgentEnabled.key())); if (sshToAgent) { Ternary credentials = getHostCredentials(host); connectAndRestartAgentOnHost(host, credentials.first(), credentials.second(), credentials.third()); } else { throw new CloudRuntimeException("SSH access is disabled, cannot cancel maintenance mode as " + "host agent is not connected"); } } /** * Get host credentials * @throws CloudRuntimeException if username or password are not found */ protected Ternary getHostCredentials(HostVO host) { _hostDao.loadDetails(host); final String password = host.getDetail("password"); final String username = host.getDetail("username"); final String privateKey = _configDao.getValue("ssh.privatekey"); if ((password == null && privateKey == null) || username == null) { throw new CloudRuntimeException("SSH to agent is enabled, but username and password or private key are not found"); } return new Ternary<>(username, password, privateKey); } /** * True if agent is restarted via SSH. Assumes kvm.ssh.to.agent = true and host status is not Up */ protected void connectAndRestartAgentOnHost(HostVO host, String username, String password, String privateKey) { final com.trilead.ssh2.Connection connection = SSHCmdHelper.acquireAuthorizedConnection( host.getPrivateIpAddress(), 22, username, password, privateKey); if (connection == null) { throw new CloudRuntimeException(String.format("SSH to agent is enabled, but failed to connect to %s via IP address [%s].", host, host.getPrivateIpAddress())); } try { SSHCmdHelper.SSHCmdResult result = SSHCmdHelper.sshExecuteCmdOneShot( connection, "service cloudstack-agent restart"); if (result.getReturnCode() != 0) { throw new CloudRuntimeException(String.format("Could not restart agent on %s due to: %s", host, result.getStdErr())); } logger.debug("cloudstack-agent restart result: {}", result); } catch (final SshException e) { throw new CloudRuntimeException("SSH to agent is enabled, but agent restart failed", e); } } public boolean cancelMaintenance(final long hostId) { try { final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.AdminCancelMaintenance); if (result != null) { return result; } } catch (final AgentUnavailableException e) { return false; } return doCancelMaintenance(hostId); } @Override public boolean executeUserRequest(final long hostId, final ResourceState.Event event) { if (event == ResourceState.Event.AdminAskMaintenance) { return doMaintain(hostId); } else if (event == ResourceState.Event.AdminCancelMaintenance) { return doCancelMaintenance(hostId); } else if (event == ResourceState.Event.DeleteHost) { return doDeleteHost(hostId, false, false); } else if (event == ResourceState.Event.Unmanaged) { return doUmanageHost(hostId); } else if (event == ResourceState.Event.UpdatePassword) { return doUpdateHostPassword(hostId); } else { throw new CloudRuntimeException("Received an resource event we are not handling now, " + event); } } private boolean doUmanageHost(final long hostId) { final HostVO host = _hostDao.findById(hostId); if (host == null) { logger.debug("Cannot find host " + hostId + ", assuming it has been deleted, skip umanage"); return true; } if (host.getHypervisorType() == HypervisorType.KVM || host.getHypervisorType() == HypervisorType.LXC) { _agentMgr.easySend(hostId, new MaintainCommand()); } _agentMgr.disconnectWithoutInvestigation(hostId, Event.ShutdownRequested); return true; } @Override public boolean umanageHost(final long hostId) { try { final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.Unmanaged); if (result != null) { return result; } } catch (final AgentUnavailableException e) { return false; } return doUmanageHost(hostId); } private boolean doUpdateHostPassword(final long hostId) { if (!_agentMgr.isAgentAttached(hostId)) { return false; } DetailVO nv = _hostDetailsDao.findDetail(hostId, ApiConstants.USERNAME); final String username = nv.getValue(); nv = _hostDetailsDao.findDetail(hostId, ApiConstants.PASSWORD); final String password = nv.getValue(); final HostVO host = _hostDao.findById(hostId); final String hostIpAddress = host.getPrivateIpAddress(); final UpdateHostPasswordCommand cmd = new UpdateHostPasswordCommand(username, password, hostIpAddress); final Answer answer = _agentMgr.easySend(hostId, cmd); logger.info("Result returned from update host password ==> " + answer.getDetails()); return answer.getResult(); } @Override public boolean updateClusterPassword(final UpdateHostPasswordCmd command) { final boolean shouldUpdateHostPasswd = command.getUpdatePasswdOnHost(); // get agents for the cluster final List hostIds = _hostDao.listIdsByClusterId(command.getClusterId()); for (final Long hostId : hostIds) { try { final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.UpdatePassword); if (result != null) { return result; } } catch (final AgentUnavailableException e) { logger.error("Agent is not available!", e); } if (shouldUpdateHostPasswd) { final boolean isUpdated = doUpdateHostPassword(hostId); if (!isUpdated) { HostVO host = _hostDao.findById(hostId); throw new CloudRuntimeException( String.format("CloudStack failed to update the password of %s. Please make sure you are still able to connect to your hosts.", host)); } } } return true; } @Override public boolean updateHostPassword(final UpdateHostPasswordCmd command) { // update agent attache password try { final Boolean result = propagateResourceEvent(command.getHostId(), ResourceState.Event.UpdatePassword); if (result != null) { return result; } } catch (final AgentUnavailableException e) { logger.error("Agent is not available!", e); } final boolean shouldUpdateHostPasswd = command.getUpdatePasswdOnHost(); // If shouldUpdateHostPasswd has been set to false, the method doUpdateHostPassword() won't be called. return shouldUpdateHostPasswd && doUpdateHostPassword(command.getHostId()); } public String getPeerName(final long agentHostId) { final HostVO host = _hostDao.findById(agentHostId); if (host != null && host.getManagementServerId() != null) { if (_clusterMgr.getSelfPeerName().equals(Long.toString(host.getManagementServerId()))) { return null; } return Long.toString(host.getManagementServerId()); } return null; } public Boolean propagateResourceEvent(final long agentId, final ResourceState.Event event) throws AgentUnavailableException { final String msPeer = getPeerName(agentId); if (msPeer == null) { return null; } logger.debug("Propagating resource request event:" + event.toString() + " to agent:" + agentId); final Command[] cmds = new Command[1]; cmds[0] = new PropagateResourceEventCommand(agentId, event); final String AnsStr = _clusterMgr.execute(msPeer, agentId, _gson.toJson(cmds), true); if (AnsStr == null) { throw new AgentUnavailableException(agentId); } final Answer[] answers = _gson.fromJson(AnsStr, Answer[].class); if (logger.isDebugEnabled()) { logger.debug("Result for agent change is " + answers[0].getResult()); } return answers[0].getResult(); } @Override public boolean migrateAwayFailed(final long hostId, final long vmId) { final HostVO host = _hostDao.findById(hostId); if (host == null) { if (logger.isDebugEnabled()) { logger.debug("Cant not find host " + hostId); } return false; } else { try { logger.warn("Migration of VM {} failed from host {}. Emitting event UnableToMigrate.", _vmDao.findById(vmId), host); return resourceStateTransitTo(host, ResourceState.Event.UnableToMigrate, _nodeId); } catch (final NoTransitionException e) { logger.debug(String.format("No next resource state for %s while current state is [%s] with event %s", host, host.getResourceState(), ResourceState.Event.UnableToMigrate), e); return false; } } } @Override public List findDirectlyConnectedHosts() { /* The resource column is not null for direct connected resource */ final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getResource(), Op.NNULL); sc.and(sc.entity().getResourceState(), Op.NIN, ResourceState.Disabled); return sc.list(); } @Override public List listAllUpAndEnabledHosts(final Type type, final Long clusterId, final Long podId, final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); if (type != null) { sc.and(sc.entity().getType(), Op.EQ, type); } if (clusterId != null) { sc.and(sc.entity().getClusterId(), Op.EQ, clusterId); } if (podId != null) { sc.and(sc.entity().getPodId(), Op.EQ, podId); } sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); sc.and(sc.entity().getResourceState(), Op.EQ, ResourceState.Enabled); return sc.list(); } @Override public List listAllHosts(final Type type, final Long clusterId, final Long podId, final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); if (type != null) { sc.and(sc.entity().getType(), Op.EQ, type); } if (clusterId != null) { sc.and(sc.entity().getClusterId(), Op.EQ, clusterId); } if (podId != null) { sc.and(sc.entity().getPodId(), Op.EQ, podId); } sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); return sc.list(); } @Override public List listAllUpHosts(Type type, Long clusterId, Long podId, long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); if (type != null) { sc.and(sc.entity().getType(), Op.EQ, type); } if (clusterId != null) { sc.and(sc.entity().getClusterId(), Op.EQ, clusterId); } if (podId != null) { sc.and(sc.entity().getPodId(), Op.EQ, podId); } sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); return sc.list(); } @Override public List listAllUpAndEnabledNonHAHosts(final Type type, final Long clusterId, final Long podId, final long dcId) { final String haTag = _haMgr.getHaTag(); return _hostDao.listAllUpAndEnabledNonHAHosts(type, clusterId, podId, dcId, haTag); } @Override public List findHostByGuid(final long dcId, final String guid) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getGuid(), Op.EQ, guid); return sc.list(); } @Override public List listAllHostsInCluster(final long clusterId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getClusterId(), Op.EQ, clusterId); return sc.list(); } @Override public List listHostsInClusterByStatus(final long clusterId, final Status status) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getClusterId(), Op.EQ, clusterId); sc.and(sc.entity().getStatus(), Op.EQ, status); return sc.list(); } @Override public List listAllUpAndEnabledHostsInOneZoneByType(final Type type, final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getType(), Op.EQ, type); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); sc.and(sc.entity().getResourceState(), Op.EQ, ResourceState.Enabled); return sc.list(); } @Override public List listAllNotInMaintenanceHostsInOneZone(final Type type, final Long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); if (dcId != null) { sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); } sc.and(sc.entity().getType(), Op.EQ, type); sc.and(sc.entity().getResourceState(), Op.NIN, ResourceState.Maintenance, ResourceState.ErrorInMaintenance, ResourceState.ErrorInPrepareForMaintenance, ResourceState.PrepareForMaintenance, ResourceState.Error); return sc.list(); } @Override public List listAllHostsInOneZoneByType(final Type type, final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getType(), Op.EQ, type); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); return sc.list(); } @Override public List listAllHostsInAllZonesByType(final Type type) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getType(), Op.EQ, type); return sc.list(); } @Override public List listAvailHypervisorInZone(final Long zoneId) { final SearchCriteria sc = _hypervisorsInDC.create(); if (zoneId != null) { sc.setParameters("dataCenter", zoneId); } sc.setParameters("type", Host.Type.Routing); return _hostDao.customSearch(sc, null).stream() // The search is not able to return list of enums, so getting // list of hypervisors as strings and then converting them to enum .map(HypervisorType::getType).collect(Collectors.toList()); } @Override public HostVO findHostByGuid(final String guid) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getGuid(), Op.EQ, guid); return sc.find(); } @Override public HostVO findHostByName(final String name) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getName(), Op.EQ, name); return sc.find(); } @Override public HostStats getHostStatistics(final Host host) { final Answer answer = _agentMgr.easySend(host.getId(), new GetHostStatsCommand(host.getGuid(), host.getName(), host.getId())); if (answer instanceof UnsupportedAnswer) { return null; } if (answer == null || !answer.getResult()) { logger.warn("Unable to obtain {} statistics.", host); return null; } else { // now construct the result object if (answer instanceof GetHostStatsAnswer) { return ((GetHostStatsAnswer)answer).getHostStats(); } } return null; } @Override public Long getGuestOSCategoryId(final long hostId) { final HostVO host = _hostDao.findById(hostId); if (host == null) { return null; } else { _hostDao.loadDetails(host); final DetailVO detail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); if (detail == null) { return null; } else { return Long.parseLong(detail.getValue()); } } } @Override public String getHostTags(final long hostId) { final List hostTags = _hostTagsDao.getHostTags(hostId).parallelStream().map(HostTagVO::getTag).collect(Collectors.toList()); return StringUtils.listToCsvTags(hostTags); } @Override public List listByDataCenter(final long dcId) { final List pods = _podDao.listByDataCenterId(dcId); final ArrayList pcs = new ArrayList<>(); for (final HostPodVO pod : pods) { final List clusters = _clusterDao.listByPodId(pod.getId()); if (clusters.isEmpty()) { pcs.add(new PodCluster(pod, null)); } else { for (final ClusterVO cluster : clusters) { pcs.add(new PodCluster(pod, cluster)); } } } return pcs; } @Override public List listAllUpAndEnabledHostsInOneZoneByHypervisor(final HypervisorType type, final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getHypervisorType(), Op.EQ, type); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); sc.and(sc.entity().getResourceState(), Op.EQ, ResourceState.Enabled); return sc.list(); } @Override public List listAllUpHostsInOneZoneByHypervisor(final HypervisorType type, final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getHypervisorType(), Op.EQ, type); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); return sc.list(); } @Override public List listAllUpAndEnabledHostsInOneZone(final long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); sc.and(sc.entity().getResourceState(), Op.EQ, ResourceState.Enabled); return sc.list(); } @Override public boolean isHostGpuEnabled(final long hostId) { final SearchCriteria sc = _gpuAvailability.create(); sc.setParameters("hostId", hostId); return !_hostGpuGroupsDao.customSearch(sc, null).isEmpty(); } @Override public List listAvailableGPUDevice(final long hostId, final String groupName, final String vgpuType) { Filter searchFilter = new Filter(null, null); searchFilter.addOrderBy(VGPUTypesVO.class, "remainingCapacity", false, "groupId"); final SearchCriteria sc = _gpuAvailability.create(); sc.setParameters("hostId", hostId); sc.setParameters("groupName", groupName); sc.setJoinParameters("groupId", "vgpuType", vgpuType); sc.setJoinParameters("groupId", "remainingCapacity", 0); return _hostGpuGroupsDao.customSearch(sc, searchFilter); } @Override public List listAllHostsInOneZoneNotInClusterByHypervisor(final HypervisorType type, final long dcId, final long clusterId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getHypervisorType(), Op.EQ, type); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getClusterId(), Op.NEQ, clusterId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); return sc.list(); } @Override public List listAllHostsInOneZoneNotInClusterByHypervisors(List types, final long dcId, final long clusterId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getHypervisorType(), Op.IN, types); sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); sc.and(sc.entity().getClusterId(), Op.NEQ, clusterId); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); return sc.list(); } @Override public boolean isGPUDeviceAvailable(final Host host, final String groupName, final String vgpuType) { if(!listAvailableGPUDevice(host.getId(), groupName, vgpuType).isEmpty()) { return true; } else { if (logger.isDebugEnabled()) { logger.debug("Host: {} does not have GPU device available", host); } return false; } } @Override public GPUDeviceTO getGPUDevice(final long hostId, final String groupName, final String vgpuType) { final List gpuDeviceList = listAvailableGPUDevice(hostId, groupName, vgpuType); if (CollectionUtils.isEmpty(gpuDeviceList)) { final String errorMsg = String.format("Host %s does not have required GPU device or out of capacity. GPU group: %s, vGPU Type: %s", _hostDao.findById(hostId), groupName, vgpuType); logger.error(errorMsg); throw new CloudRuntimeException(errorMsg); } return new GPUDeviceTO(gpuDeviceList.get(0).getGroupName(), vgpuType, null); } @Override public void updateGPUDetails(final long hostId, final HashMap> groupDetails) { // Update GPU group capacity final TransactionLegacy txn = TransactionLegacy.currentTxn(); txn.start(); _hostGpuGroupsDao.persist(hostId, new ArrayList<>(groupDetails.keySet())); _vgpuTypesDao.persist(hostId, groupDetails); txn.commit(); } @Override public HashMap> getGPUStatistics(final HostVO host) { final Answer answer = _agentMgr.easySend(host.getId(), new GetGPUStatsCommand(host.getGuid(), host.getName())); if (answer instanceof UnsupportedAnswer) { return null; } if (answer == null || !answer.getResult()) { final String msg = String.format("Unable to obtain GPU stats for %s", host); logger.warn(msg); return null; } else { // now construct the result object if (answer instanceof GetGPUStatsAnswer) { return ((GetGPUStatsAnswer)answer).getGroupDetails(); } } return null; } @Override public HostVO findOneRandomRunningHostByHypervisor(final HypervisorType type, final Long dcId) { final QueryBuilder sc = QueryBuilder.create(HostVO.class); sc.and(sc.entity().getHypervisorType(), Op.EQ, type); sc.and(sc.entity().getType(),Op.EQ, Type.Routing); sc.and(sc.entity().getStatus(), Op.EQ, Status.Up); sc.and(sc.entity().getResourceState(), Op.EQ, ResourceState.Enabled); if (dcId != null) { sc.and(sc.entity().getDataCenterId(), Op.EQ, dcId); } sc.and(sc.entity().getRemoved(), Op.NULL); List hosts = sc.list(); if (CollectionUtils.isEmpty(hosts)) { return null; } else { Collections.shuffle(hosts, new Random(System.currentTimeMillis())); return hosts.get(0); } } @Override @DB @ActionEvent(eventType = EventTypes.EVENT_HOST_RESERVATION_RELEASE, eventDescription = "releasing host reservation", async = true) public boolean releaseHostReservation(final Long hostId) { try { return Transaction.execute(new TransactionCallback<>() { @Override public Boolean doInTransaction(final TransactionStatus status) { final PlannerHostReservationVO reservationEntry = _plannerHostReserveDao.findByHostId(hostId); if (reservationEntry != null) { final long id = reservationEntry.getId(); final PlannerHostReservationVO hostReservation = _plannerHostReserveDao.lockRow(id, true); if (hostReservation == null) { if (logger.isDebugEnabled()) { logger.debug("Host reservation for host: {} does not even exist. Release reservartion call is ignored.", () -> _hostDao.findById(hostId)); } return false; } hostReservation.setResourceUsage(null); _plannerHostReserveDao.persist(hostReservation); return true; } if (logger.isDebugEnabled()) { logger.debug("Host reservation for host: {} does not even exist. Release reservartion call is ignored.", () -> _hostDao.findById(hostId)); } return false; } }); } catch (final CloudRuntimeException e) { throw e; } catch (final Throwable t) { logger.error("Unable to release host reservation for host: {}", _hostDao.findById(hostId), t); return false; } } @Override public String getConfigComponentName() { return ResourceManagerImpl.class.getSimpleName(); } @Override public ConfigKey[] getConfigKeys() { return new ConfigKey[] { KvmSshToAgentEnabled, HOST_MAINTENANCE_LOCAL_STRATEGY, SystemVmPreferredArchitecture }; } }