diff --git a/.travis.yml b/.travis.yml index b25becb6e05..ad4154a5289 100644 --- a/.travis.yml +++ b/.travis.yml @@ -44,6 +44,10 @@ env: smoke/test_dynamicroles smoke/test_global_settings smoke/test_guest_vlan_range + smoke/test_ha_for_host + smoke/test_ha_kvm_agent + smoke/test_ha_kvm + smoke/test_hostha_simulator smoke/test_hosts smoke/test_internal_lb smoke/test_iso @@ -52,6 +56,7 @@ env: smoke/test_login smoke/test_metrics_api smoke/test_multipleips_per_nic + smoke/test_nested_virtualization smoke/test_network smoke/test_network_acl smoke/test_nic diff --git a/api/src/com/cloud/dc/DataCenter.java b/api/src/com/cloud/dc/DataCenter.java index 5b3d3c01f30..7d434c5f231 100644 --- a/api/src/com/cloud/dc/DataCenter.java +++ b/api/src/com/cloud/dc/DataCenter.java @@ -18,15 +18,14 @@ package com.cloud.dc; import com.cloud.org.Grouping; import org.apache.cloudstack.acl.InfrastructureEntity; -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; +import org.apache.cloudstack.kernel.Partition; import java.util.Map; /** * */ -public interface DataCenter extends InfrastructureEntity, Grouping, Identity, InternalIdentity { +public interface DataCenter extends InfrastructureEntity, Grouping, Partition { public enum NetworkType { Basic, Advanced, diff --git a/api/src/com/cloud/event/EventTypes.java b/api/src/com/cloud/event/EventTypes.java index 66bfbfe6d28..f63ffdf1219 100644 --- a/api/src/com/cloud/event/EventTypes.java +++ b/api/src/com/cloud/event/EventTypes.java @@ -22,6 +22,7 @@ import java.util.Map; import org.apache.cloudstack.acl.Role; import org.apache.cloudstack.acl.RolePermission; import org.apache.cloudstack.config.Configuration; +import org.apache.cloudstack.ha.HAConfig; import org.apache.cloudstack.usage.Usage; import com.cloud.dc.DataCenter; @@ -323,6 +324,12 @@ public class EventTypes { public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD = "HOST.OOBM.CHANGEPASSWORD"; public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION = "HOST.OOBM.POWERSTATE.TRANSITION"; + // HA + public static final String EVENT_HA_RESOURCE_ENABLE = "HA.RESOURCE.ENABLE"; + public static final String EVENT_HA_RESOURCE_DISABLE = "HA.RESOURCE.DISABLE"; + public static final String EVENT_HA_RESOURCE_CONFIGURE = "HA.RESOURCE.CONFIGURE"; + public static final String EVENT_HA_STATE_TRANSITION = "HA.STATE.TRANSITION"; + // Maintenance public static final String EVENT_MAINTENANCE_CANCEL = "MAINT.CANCEL"; public static final String EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE = "MAINT.CANCEL.PS"; @@ -779,6 +786,12 @@ public class EventTypes { entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD, Host.class); entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION, Host.class); + // HA + entityEventDetails.put(EVENT_HA_RESOURCE_ENABLE, HAConfig.class); + entityEventDetails.put(EVENT_HA_RESOURCE_DISABLE, HAConfig.class); + entityEventDetails.put(EVENT_HA_RESOURCE_CONFIGURE, HAConfig.class); + entityEventDetails.put(EVENT_HA_STATE_TRANSITION, HAConfig.class); + // Maintenance entityEventDetails.put(EVENT_MAINTENANCE_CANCEL, Host.class); entityEventDetails.put(EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE, Host.class); diff --git a/api/src/com/cloud/host/Host.java b/api/src/com/cloud/host/Host.java index 3ed4f5e0ce9..1ecd48d74ce 100644 --- a/api/src/com/cloud/host/Host.java +++ b/api/src/com/cloud/host/Host.java @@ -16,19 +16,19 @@ // under the License. package com.cloud.host; -import java.util.Date; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; - import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.resource.ResourceState; import com.cloud.utils.fsm.StateObject; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.kernel.Partition; + +import java.util.Date; /** * Host represents one particular host server. */ -public interface Host extends StateObject, Identity, InternalIdentity { +public interface Host extends StateObject, Identity, Partition, HAResource { public enum Type { Storage(false), Routing(false), SecondaryStorage(false), SecondaryStorageCmdExecutor(false), ConsoleProxy(true), ExternalFirewall(false), ExternalLoadBalancer( false), ExternalVirtualSwitchSupervisor(false), PxeServer(false), BaremetalPxe(false), BaremetalDhcp(false), TrafficMonitor(false), NetScalerControlCenter(false), @@ -202,5 +202,7 @@ public interface Host extends StateObject, Identity, InternalIdentity { boolean isInMaintenanceStates(); + boolean isDisabled(); + ResourceState getResourceState(); } diff --git a/api/src/com/cloud/host/Status.java b/api/src/com/cloud/host/Status.java index 73e6cc9185a..e381115db41 100644 --- a/api/src/com/cloud/host/Status.java +++ b/api/src/com/cloud/host/Status.java @@ -150,12 +150,14 @@ public enum Status { s_fsm.addTransition(Status.Down, Event.ManagementServerDown, Status.Down); s_fsm.addTransition(Status.Down, Event.AgentDisconnected, Status.Down); s_fsm.addTransition(Status.Down, Event.PingTimeout, Status.Down); + s_fsm.addTransition(Status.Down, Event.HostDown, Status.Down); s_fsm.addTransition(Status.Alert, Event.AgentConnected, Status.Connecting); s_fsm.addTransition(Status.Alert, Event.Ping, Status.Up); s_fsm.addTransition(Status.Alert, Event.Remove, Status.Removed); s_fsm.addTransition(Status.Alert, Event.ManagementServerDown, Status.Alert); s_fsm.addTransition(Status.Alert, Event.AgentDisconnected, Status.Alert); s_fsm.addTransition(Status.Alert, Event.ShutdownRequested, Status.Disconnected); + s_fsm.addTransition(Status.Alert, Event.HostDown, Status.Down); s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected); s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting); s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected); diff --git a/api/src/com/cloud/org/Cluster.java b/api/src/com/cloud/org/Cluster.java index 90fcb5729e4..4079c88dfde 100644 --- a/api/src/com/cloud/org/Cluster.java +++ b/api/src/com/cloud/org/Cluster.java @@ -16,13 +16,11 @@ // under the License. package com.cloud.org; -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; - import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.org.Managed.ManagedState; +import org.apache.cloudstack.kernel.Partition; -public interface Cluster extends Grouping, InternalIdentity, Identity { +public interface Cluster extends Grouping, Partition { public static enum ClusterType { CloudManaged, ExternalManaged; }; diff --git a/api/src/com/cloud/resource/ResourceState.java b/api/src/com/cloud/resource/ResourceState.java index 5d2c962f989..d952afa0b7d 100644 --- a/api/src/com/cloud/resource/ResourceState.java +++ b/api/src/com/cloud/resource/ResourceState.java @@ -93,6 +93,7 @@ public enum ResourceState { s_fsm.addTransition(ResourceState.Enabled, Event.InternalCreated, ResourceState.Enabled); s_fsm.addTransition(ResourceState.Enabled, Event.Disable, ResourceState.Disabled); s_fsm.addTransition(ResourceState.Enabled, Event.AdminAskMaintenace, ResourceState.PrepareForMaintenance); + s_fsm.addTransition(ResourceState.Enabled, Event.InternalEnterMaintenance, ResourceState.Maintenance); s_fsm.addTransition(ResourceState.Disabled, Event.Enable, ResourceState.Enabled); s_fsm.addTransition(ResourceState.Disabled, Event.Disable, ResourceState.Disabled); s_fsm.addTransition(ResourceState.Disabled, Event.InternalCreated, ResourceState.Disabled); @@ -109,5 +110,7 @@ public enum ResourceState { s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.InternalEnterMaintenance, ResourceState.Maintenance); s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.AdminCancelMaintenance, ResourceState.Enabled); s_fsm.addTransition(ResourceState.Error, Event.InternalCreated, ResourceState.Error); + s_fsm.addTransition(ResourceState.Disabled, Event.DeleteHost, ResourceState.Disabled); + } } diff --git a/api/src/com/cloud/vm/VirtualMachine.java b/api/src/com/cloud/vm/VirtualMachine.java index c70197a6c81..a46edd78f44 100644 --- a/api/src/com/cloud/vm/VirtualMachine.java +++ b/api/src/com/cloud/vm/VirtualMachine.java @@ -16,26 +16,24 @@ // under the License. package com.cloud.vm; -import java.util.Arrays; -import java.util.Date; -import java.util.Map; - -import org.apache.cloudstack.acl.ControlledEntity; -import org.apache.cloudstack.api.Displayable; -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.api.InternalIdentity; - import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.utils.fsm.StateMachine2; import com.cloud.utils.fsm.StateMachine2.Transition; import com.cloud.utils.fsm.StateMachine2.Transition.Impact; import com.cloud.utils.fsm.StateObject; +import org.apache.cloudstack.acl.ControlledEntity; +import org.apache.cloudstack.api.Displayable; +import org.apache.cloudstack.kernel.Partition; + +import java.util.Arrays; +import java.util.Date; +import java.util.Map; /** * VirtualMachine describes the properties held by a virtual machine * */ -public interface VirtualMachine extends RunningOn, ControlledEntity, Identity, InternalIdentity, Displayable, StateObject { +public interface VirtualMachine extends RunningOn, ControlledEntity, Partition, Displayable, StateObject { public enum PowerState { PowerUnknown, diff --git a/api/src/org/apache/cloudstack/alert/AlertService.java b/api/src/org/apache/cloudstack/alert/AlertService.java index 841296996ea..26c3f3cf3ab 100644 --- a/api/src/org/apache/cloudstack/alert/AlertService.java +++ b/api/src/org/apache/cloudstack/alert/AlertService.java @@ -16,12 +16,12 @@ // under the License. package org.apache.cloudstack.alert; -import java.util.HashSet; -import java.util.Set; - import com.cloud.capacity.Capacity; import com.cloud.exception.InvalidParameterValueException; +import java.util.HashSet; +import java.util.Set; + public interface AlertService { public static class AlertType { private static Set defaultAlertTypes = new HashSet(); @@ -67,6 +67,7 @@ public interface AlertService { public static final AlertType ALERT_TYPE_SYNC = new AlertType((short)27, "ALERT.TYPE.SYNC", true); public static final AlertType ALERT_TYPE_UPLOAD_FAILED = new AlertType((short)28, "ALERT.UPLOAD.FAILED", true); public static final AlertType ALERT_TYPE_OOBM_AUTH_ERROR = new AlertType((short)29, "ALERT.OOBM.AUTHERROR", true); + public static final AlertType ALERT_TYPE_HA_ACTION = new AlertType((short)30, "ALERT.HA.ACTION", true); public static final AlertType ALERT_TYPE_CA_CERT = new AlertType((short)31, "ALERT.CA.CERT", true); public short getType() { diff --git a/api/src/org/apache/cloudstack/api/ApiConstants.java b/api/src/org/apache/cloudstack/api/ApiConstants.java index da39ff84fae..88ade5c1656 100644 --- a/api/src/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/org/apache/cloudstack/api/ApiConstants.java @@ -21,6 +21,7 @@ public class ApiConstants { public static final String ACCOUNTS = "accounts"; public static final String ACCOUNT_TYPE = "accounttype"; public static final String ACCOUNT_ID = "accountid"; + public static final String ACTIVITY = "activity"; public static final String ADDRESS = "address"; public static final String ALGORITHM = "algorithm"; public static final String ALLOCATED_ONLY = "allocatedonly"; @@ -98,6 +99,7 @@ public class ApiConstants { public static final String DOMAIN_ID = "domainid"; public static final String DOMAIN__ID = "domainId"; public static final String DURATION = "duration"; + public static final String ELIGIBLE = "eligible"; public static final String EMAIL = "email"; public static final String END_DATE = "enddate"; public static final String END_IP = "endip"; @@ -105,6 +107,7 @@ public class ApiConstants { public static final String END_PORT = "endport"; public static final String ENTRY_TIME = "entrytime"; public static final String EXPIRES = "expires"; + public static final String FENCE = "fence"; public static final String FETCH_LATEST = "fetchlatest"; public static final String FIRSTNAME = "firstname"; public static final String FORCED = "forced"; @@ -123,6 +126,9 @@ public class ApiConstants { public static final String GUEST_CIDR_ADDRESS = "guestcidraddress"; public static final String GUEST_VLAN_RANGE = "guestvlanrange"; public static final String HA_ENABLE = "haenable"; + public static final String HA_PROVIDER = "haprovider"; + public static final String HA_STATE = "hastate"; + public static final String HEALTH = "health"; public static final String HOST_ID = "hostid"; public static final String HOST_NAME = "hostname"; public static final String HYPERVISOR = "hypervisor"; @@ -220,6 +226,7 @@ public class ApiConstants { public static final String PUBLIC_ZONE = "publiczone"; public static final String RECEIVED_BYTES = "receivedbytes"; public static final String RECONNECT = "reconnect"; + public static final String RECOVER = "recover"; public static final String REQUIRES_HVM = "requireshvm"; public static final String RESOURCE_TYPE = "resourcetype"; public static final String RESPONSE = "response"; diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/ConfigureHAForHostCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/ConfigureHAForHostCmd.java new file mode 100644 index 00000000000..f85dbb23504 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/ConfigureHAForHostCmd.java @@ -0,0 +1,127 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; + +@APICommand(name = ConfigureHAForHostCmd.APINAME, description = "Configures HA for a host", + responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class ConfigureHAForHostCmd extends BaseAsyncCmd { + public static final String APINAME = "configureHAForHost"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + @Parameter(name = ApiConstants.PROVIDER, type = CommandType.STRING, + description = "HA provider", required = true, validations = {ApiArgValidator.NotNullOrEmpty}) + private String haProvider; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + public String getHaProvider() { + return haProvider; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + private void setupResponse(final boolean result, final String resourceUuid) { + final HostHAResponse response = new HostHAResponse(); + response.setId(resourceUuid); + response.setProvider(getHaProvider().toLowerCase()); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + + final boolean result = haConfigManager.configureHA(host.getId(), HAResource.ResourceType.Host, getHaProvider()); + if (!result) { + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to configure HA provider for the host"); + } + CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA configured with provider: " + getHaProvider()); + CallContext.current().putContextParameter(Host.class, host.getUuid()); + + setupResponse(result, host.getUuid()); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "configure HA for host: " + getHostId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForClusterCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForClusterCmd.java new file mode 100644 index 00000000000..053c978b831 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForClusterCmd.java @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.org.Cluster; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.ClusterResponse; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = DisableHAForClusterCmd.APINAME, description = "Disables HA cluster-wide", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class DisableHAForClusterCmd extends BaseAsyncCmd { + public static final String APINAME = "disableHAForCluster"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.CLUSTER_ID, type = BaseCmd.CommandType.UUID, entityType = ClusterResponse.class, + description = "ID of the cluster", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long clusterId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getClusterId() { + return clusterId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Cluster cluster = _resourceService.getCluster(getClusterId()); + if (cluster == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find cluster by ID: " + getClusterId()); + } + final boolean result = haConfigManager.disableHA(cluster); + CallContext.current().setEventDetails("Cluster Id:" + cluster.getId() + " HA enabled: false"); + CallContext.current().putContextParameter(Cluster.class, cluster.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "disable HA for cluster: " + getClusterId(); + } + +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForHostCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForHostCmd.java new file mode 100644 index 00000000000..87ebe878ceb --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForHostCmd.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; + +@APICommand(name = DisableHAForHostCmd.APINAME, description = "Disables HA for a host", + responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class DisableHAForHostCmd extends BaseAsyncCmd { + public static final String APINAME = "disableHAForHost"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result, final String resourceUuid) { + final HostHAResponse response = new HostHAResponse(); + response.setId(resourceUuid); + response.setEnabled(false); + response.setStatus(result); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + + final boolean result = haConfigManager.disableHA(host.getId(), HAResource.ResourceType.Host); + CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA enabled: false"); + CallContext.current().putContextParameter(Host.class, host.getUuid()); + + setupResponse(result, host.getUuid()); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "disable HA for host: " + getHostId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForZoneCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForZoneCmd.java new file mode 100644 index 00000000000..845c4a663b5 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/DisableHAForZoneCmd.java @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.dc.DataCenter; +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.api.response.ZoneResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = DisableHAForZoneCmd.APINAME, description = "Disables HA for a zone", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class DisableHAForZoneCmd extends BaseAsyncCmd { + public static final String APINAME = "disableHAForZone"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ZONE_ID, type = BaseCmd.CommandType.UUID, entityType = ZoneResponse.class, + description = "ID of the zone", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long zoneId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getZoneId() { + return zoneId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final DataCenter dataCenter = _resourceService.getZone(getZoneId()); + if (dataCenter == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find zone by ID: " + getZoneId()); + } + + final boolean result = haConfigManager.disableHA(dataCenter); + CallContext.current().setEventDetails("Zone Id:" + dataCenter.getId() + " HA enabled: false"); + CallContext.current().putContextParameter(DataCenter.class, dataCenter.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_DISABLE; + } + + @Override + public String getEventDescription() { + return "disable HA for zone: " + getZoneId(); + } + +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForClusterCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForClusterCmd.java new file mode 100644 index 00000000000..e06d0d2c1b0 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForClusterCmd.java @@ -0,0 +1,114 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.org.Cluster; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.ClusterResponse; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = EnableHAForClusterCmd.APINAME, description = "Enables HA cluster-wide", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class EnableHAForClusterCmd extends BaseAsyncCmd { + public static final String APINAME = "enableHAForCluster"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.CLUSTER_ID, type = BaseCmd.CommandType.UUID, entityType = ClusterResponse.class, + description = "ID of the cluster", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long clusterId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getClusterId() { + return clusterId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Cluster cluster = _resourceService.getCluster(getClusterId()); + if (cluster == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find cluster by ID: " + getClusterId()); + } + + final boolean result = haConfigManager.enableHA(cluster); + CallContext.current().setEventDetails("Cluster Id:" + cluster.getId() + " HA enabled: true"); + CallContext.current().putContextParameter(Cluster.class, cluster.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_ENABLE; + } + + @Override + public String getEventDescription() { + return "enable HA for cluster: " + getClusterId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForHostCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForHostCmd.java new file mode 100644 index 00000000000..b23841ad56f --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForHostCmd.java @@ -0,0 +1,116 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; + +@APICommand(name = EnableHAForHostCmd.APINAME, description = "Enables HA for a host", + responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class EnableHAForHostCmd extends BaseAsyncCmd { + public static final String APINAME = "enableHAForHost"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result, final String resourceUuid) { + final HostHAResponse response = new HostHAResponse(); + response.setId(resourceUuid); + response.setEnabled(true); + response.setStatus(result); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + final boolean result = haConfigManager.enableHA(host.getId(), HAResource.ResourceType.Host); + + CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA enabled: true"); + CallContext.current().putContextParameter(Host.class, host.getUuid()); + + setupResponse(result, host.getUuid()); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_ENABLE; + } + + @Override + public String getEventDescription() { + return "enable HA for host: " + getHostId(); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForZoneCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForZoneCmd.java new file mode 100644 index 00000000000..443d303c296 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/EnableHAForZoneCmd.java @@ -0,0 +1,115 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.dc.DataCenter; +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.api.response.ZoneResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAConfigManager; + +import javax.inject.Inject; + +@APICommand(name = EnableHAForZoneCmd.APINAME, description = "Enables HA for a zone", + responseObject = SuccessResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class EnableHAForZoneCmd extends BaseAsyncCmd { + public static final String APINAME = "enableHAForZone"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ZONE_ID, type = BaseCmd.CommandType.UUID, entityType = ZoneResponse.class, + description = "ID of the zone", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long zoneId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getZoneId() { + return zoneId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + private void setupResponse(final boolean result) { + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(result); + response.setResponseName(getCommandName()); + response.setObjectName("ha"); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final DataCenter dataCenter = _resourceService.getZone(getZoneId()); + if (dataCenter == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find zone by ID: " + getZoneId()); + } + + final boolean result = haConfigManager.enableHA(dataCenter); + CallContext.current().setEventDetails("Zone Id:" + dataCenter.getId() + " HA enabled: true"); + CallContext.current().putContextParameter(DataCenter.class, dataCenter.getUuid()); + + setupResponse(result); + } + + @Override + public String getEventType() { + return EventTypes.EVENT_HA_RESOURCE_ENABLE; + } + + @Override + public String getEventDescription() { + return "enable HA for zone: " + getZoneId(); + } + +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAProvidersCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAProvidersCmd.java new file mode 100644 index 00000000000..64b9a6a8e5c --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAProvidersCmd.java @@ -0,0 +1,106 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.user.Account; +import com.google.common.base.Enums; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HAProviderResponse; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +@APICommand(name = ListHostHAProvidersCmd.APINAME, description = "Lists HA providers", responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class ListHostHAProvidersCmd extends BaseCmd { + public static final String APINAME = "listHostHAProviders"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HYPERVISOR, type = CommandType.STRING, required = true, + description = "Hypervisor type of the resource", validations = {ApiArgValidator.NotNullOrEmpty}) + private String hypervisorType; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public HAResource.ResourceSubType getHypervisorType() { + return HAResource.ResourceSubType.valueOf(hypervisorType); + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + private void setupResponse(final List hostHAProviderList) { + final ListResponse response = new ListResponse<>(); + final List hostHAResponses = new ArrayList<>(); + for (final String provider : hostHAProviderList) { + final HAProviderResponse haProviderResponse = new HAProviderResponse(); + haProviderResponse.setProvider(provider); + hostHAResponses.add(haProviderResponse); + } + response.setResponses(hostHAResponses); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + if (!Enums.getIfPresent(HAResource.ResourceSubType.class, hypervisorType).isPresent()) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Invalid or unsupported host hypervisor type provided. Supported types are: " + Arrays.toString(HAResource.ResourceSubType.values())); + } + final List hostHAProviders = haConfigManager.listHAProviders(HAResource.ResourceType.Host, getHypervisorType()); + setupResponse(hostHAProviders); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAResourcesCmd.java b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAResourcesCmd.java new file mode 100644 index 00000000000..75a900c2198 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/command/admin/ha/ListHostHAResourcesCmd.java @@ -0,0 +1,109 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.ha; + +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import com.cloud.user.Account; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostHAResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAConfigManager; +import org.apache.cloudstack.ha.HAResource; + +import javax.inject.Inject; +import java.util.ArrayList; +import java.util.List; + +@APICommand(name = ListHostHAResourcesCmd.APINAME, description = "Lists host HA resources", responseObject = HostHAResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + since = "4.11", authorized = {RoleType.Admin}) +public final class ListHostHAResourcesCmd extends BaseCmd { + public static final String APINAME = "listHostHAResources"; + + @Inject + private HAConfigManager haConfigManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class, + description = "List by host ID", validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return Account.ACCOUNT_ID_SYSTEM; + } + + private void setupResponse(final List hostHAConfigList) { + final ListResponse response = new ListResponse<>(); + final List hostHAResponses = new ArrayList<>(); + for (final HAConfig config : hostHAConfigList) { + final Host host = _resourceService.getHost(config.getResourceId()); + if (host == null) { + continue; + } + final HostHAResponse hostHAResponse = new HostHAResponse(); + hostHAResponse.setId(host.getUuid()); + hostHAResponse.setEnabled(config.isEnabled()); + hostHAResponse.setHaState(config.getState()); + hostHAResponse.setProvider(config.getHaProvider()); + hostHAResponses.add(hostHAResponse); + } + response.setResponses(hostHAResponses); + response.setResponseName(getCommandName()); + setResponseObject(response); + } + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final List hostHAConfig = haConfigManager.listHAResources(getHostId(), HAResource.ResourceType.Host); + setupResponse(hostHAConfig); + } +} diff --git a/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java index e49aabc49d4..aa7cfed1e8f 100644 --- a/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java +++ b/api/src/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java @@ -108,4 +108,8 @@ public class PrepareForMaintenanceCmd extends BaseAsyncCmd { throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to prepare host for maintenance"); } } + + public void setHostId(final Long hostId) { + id = hostId; + } } diff --git a/api/src/org/apache/cloudstack/api/response/HAProviderResponse.java b/api/src/org/apache/cloudstack/api/response/HAProviderResponse.java new file mode 100644 index 00000000000..d75cbc3e120 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/response/HAProviderResponse.java @@ -0,0 +1,58 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.ha.HAConfig; + +import java.util.List; + +@EntityReference(value = HAConfig.class) +public final class HAProviderResponse extends BaseResponse { + @SerializedName(ApiConstants.HA_PROVIDER) + @Param(description = "the HA provider") + private String provider; + + @SerializedName(ApiConstants.TYPE) + @Param(description = "the HA provider resource type detail") + private List supportedResourceTypes; + + public HAProviderResponse() { + super("haprovider"); + } + + public String getProvider() { + return provider; + } + + public void setProvider(String provider) { + this.provider = provider; + } + + public List getSupportedResourceTypes() { + return supportedResourceTypes; + } + + public void setSupportedResourceTypes(List supportedResourceTypes) { + this.supportedResourceTypes = supportedResourceTypes; + } +} diff --git a/api/src/org/apache/cloudstack/api/response/HostHAResponse.java b/api/src/org/apache/cloudstack/api/response/HostHAResponse.java new file mode 100644 index 00000000000..942250cbc21 --- /dev/null +++ b/api/src/org/apache/cloudstack/api/response/HostHAResponse.java @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.ha.HAConfig; + +@EntityReference(value = HAConfig.class) +public final class HostHAResponse extends BaseResponse { + @SerializedName(ApiConstants.HOST_ID) + @Param(description = "the ID of the host") + private String id; + + @SerializedName(ApiConstants.HA_ENABLE) + @Param(description = "if host HA is enabled for the host") + private Boolean enabled; + + @SerializedName(ApiConstants.HA_STATE) + @Param(description = "the HA state of the host") + private HAConfig.HAState haState; + + @SerializedName(ApiConstants.HA_PROVIDER) + @Param(description = "the host HA provider") + private String provider; + + @SerializedName(ApiConstants.STATUS) + @Param(description = "operation status") + private Boolean status; + + public HostHAResponse() { + super("hostha"); + } + + public HostHAResponse(final HAConfig config) { + this(); + if (config == null) { + this.enabled = false; + this.haState = HAConfig.HAState.Disabled; + return; + } + setProvider(config.getHaProvider()); + setEnabled(config.isEnabled()); + setHaState(config.getState()); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public Boolean getEnabled() { + return enabled; + } + + public void setEnabled(Boolean enabled) { + this.enabled = enabled; + } + + public HAConfig.HAState getHaState() { + return haState; + } + + public void setHaState(HAConfig.HAState haState) { + this.haState = haState; + } + + public String getProvider() { + return provider; + } + + public void setProvider(String provider) { + this.provider = provider; + } + + public Boolean getStatus() { + return status; + } + + public void setStatus(Boolean status) { + this.status = status; + } +} diff --git a/api/src/org/apache/cloudstack/api/response/HostResponse.java b/api/src/org/apache/cloudstack/api/response/HostResponse.java index 1a3a80ce0bf..91cb8058813 100644 --- a/api/src/org/apache/cloudstack/api/response/HostResponse.java +++ b/api/src/org/apache/cloudstack/api/response/HostResponse.java @@ -24,6 +24,7 @@ import com.google.gson.annotations.SerializedName; import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseResponse; import org.apache.cloudstack.api.EntityReference; +import org.apache.cloudstack.ha.HAConfig; import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement; import java.util.Date; @@ -206,6 +207,10 @@ public class HostResponse extends BaseResponse { @Param(description = "true if this host is suitable(has enough capacity and satisfies all conditions like hosttags, max guests vm limit etc) to migrate a VM to it , false otherwise") private Boolean suitableForMigration; + @SerializedName("hostha") + @Param(description = "the host HA information information") + private HostHAResponse hostHAResponse; + @SerializedName("outofbandmanagement") @Param(description = "the host out-of-band management information") private OutOfBandManagementResponse outOfBandManagementResponse; @@ -413,6 +418,14 @@ public class HostResponse extends BaseResponse { this.suitableForMigration = suitableForMigration; } + public HostHAResponse getHostHAResponse() { + return hostHAResponse; + } + + public void setHostHAResponse(final HAConfig config) { + this.hostHAResponse = new HostHAResponse(config); + } + public OutOfBandManagementResponse getOutOfBandManagementResponse() { return outOfBandManagementResponse; } diff --git a/api/src/org/apache/cloudstack/ha/HAConfig.java b/api/src/org/apache/cloudstack/ha/HAConfig.java new file mode 100644 index 00000000000..36fe11c410d --- /dev/null +++ b/api/src/org/apache/cloudstack/ha/HAConfig.java @@ -0,0 +1,142 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package org.apache.cloudstack.ha; + +import com.cloud.utils.fsm.StateMachine2; +import com.cloud.utils.fsm.StateObject; +import org.apache.cloudstack.api.InternalIdentity; +import org.apache.cloudstack.utils.identity.ManagementServerNode; + +public interface HAConfig extends StateObject, InternalIdentity { + + long getResourceId(); + HAResource.ResourceType getResourceType(); + boolean isEnabled(); + HAState getState(); + String getHaProvider(); + Long getManagementServerId(); + + enum Event { + Eligible, + Ineligible, + Disabled, + Enabled, + HealthCheckPassed, + HealthCheckFailed, + PerformActivityCheck, + TooFewActivityCheckSamples, + PeriodicRecheckResourceActivity, + ActivityCheckFailureOverThresholdRatio, + ActivityCheckFailureUnderThresholdRatio, + PowerCycle, + Recovered, + RecoveryWaitPeriodTimeout, + RecoveryOperationThresholdExceeded, + Fenced; + + public Long getServerId() { + // TODO: change in future if we've better claim & ownership + // Right now the first one to update the db wins + // and mgmt server id would eventually become consistent + return ManagementServerNode.getManagementServerId(); + } + } + + enum HAState { + Disabled("HA Operations disabled"), + Available("The resource is healthy"), + Ineligible("The current state does not support HA/recovery"), + Suspect("Most recent health check failed"), + Degraded("The resource cannot be managed, but services end user requests"), + Checking("The activity checks are currently being performed"), + Recovering("The resource is undergoing recovery operation"), + Recovered("The resource is recovered"), + Fencing("The resource is undergoing fence operation"), + Fenced("The resource is fenced"); + + String description; + + HAState(String description) { + this.description = description; + } + + public static StateMachine2 getStateMachine() { + return FSM; + } + + public String getDescription() { + return description; + } + + private static final StateMachine2 FSM = new StateMachine2<>(); + + static { + FSM.addInitialTransition(Event.Disabled, Disabled); + FSM.addInitialTransition(Event.Enabled, Available); + FSM.addInitialTransition(Event.Ineligible, Ineligible); + + FSM.addTransition(Disabled, Event.Enabled, Available); + + FSM.addTransition(Ineligible, Event.Disabled, Disabled); + FSM.addTransition(Ineligible, Event.Ineligible, Ineligible); + FSM.addTransition(Ineligible, Event.Eligible, Available); + + FSM.addTransition(Available, Event.Disabled, Disabled); + FSM.addTransition(Available, Event.Ineligible, Ineligible); + FSM.addTransition(Available, Event.HealthCheckPassed, Available); + FSM.addTransition(Available, Event.HealthCheckFailed, Suspect); + + FSM.addTransition(Suspect, Event.Disabled, Disabled); + FSM.addTransition(Suspect, Event.Ineligible, Ineligible); + FSM.addTransition(Suspect, Event.HealthCheckFailed, Suspect); + FSM.addTransition(Suspect, Event.PerformActivityCheck, Checking); + FSM.addTransition(Suspect, Event.HealthCheckPassed, Available); + + FSM.addTransition(Checking, Event.Disabled, Disabled); + FSM.addTransition(Checking, Event.Ineligible, Ineligible); + FSM.addTransition(Checking, Event.TooFewActivityCheckSamples, Suspect); + FSM.addTransition(Checking, Event.ActivityCheckFailureUnderThresholdRatio, Degraded); + FSM.addTransition(Checking, Event.ActivityCheckFailureOverThresholdRatio, Recovering); + + FSM.addTransition(Degraded, Event.Disabled, Disabled); + FSM.addTransition(Degraded, Event.Ineligible, Ineligible); + FSM.addTransition(Degraded, Event.HealthCheckFailed, Degraded); + FSM.addTransition(Degraded, Event.HealthCheckPassed, Available); + FSM.addTransition(Degraded, Event.PeriodicRecheckResourceActivity, Suspect); + + FSM.addTransition(Recovering, Event.Disabled, Disabled); + FSM.addTransition(Recovering, Event.Ineligible, Ineligible); + FSM.addTransition(Recovering, Event.Recovered, Recovered); + FSM.addTransition(Recovering, Event.RecoveryOperationThresholdExceeded, Fencing); + + FSM.addTransition(Recovered, Event.Disabled, Disabled); + FSM.addTransition(Recovered, Event.Ineligible, Ineligible); + FSM.addTransition(Recovered, Event.RecoveryWaitPeriodTimeout, Available); + + FSM.addTransition(Fencing, Event.Disabled, Disabled); + FSM.addTransition(Fencing, Event.Ineligible, Ineligible); + FSM.addTransition(Fencing, Event.Fenced, Fenced); + + FSM.addTransition(Fenced, Event.Disabled, Disabled); + FSM.addTransition(Fenced, Event.HealthCheckPassed, Ineligible); + FSM.addTransition(Fenced, Event.HealthCheckFailed, Fenced); + } + } +} diff --git a/api/src/org/apache/cloudstack/ha/HAConfigManager.java b/api/src/org/apache/cloudstack/ha/HAConfigManager.java new file mode 100644 index 00000000000..c9a20358c36 --- /dev/null +++ b/api/src/org/apache/cloudstack/ha/HAConfigManager.java @@ -0,0 +1,96 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.dc.DataCenter; +import com.cloud.org.Cluster; + +import java.util.List; + +/** + * @since 4.11 + */ +public interface HAConfigManager { + /** + * Configures HA for a resource by accepting the resource type and HA provider + * @param resourceId the ID of the resource + * @param resourceType the type of the resource + * @param haProvider the name of the HA provider + */ + boolean configureHA(Long resourceId, HAResource.ResourceType resourceType, String haProvider); + + /** + * Enables HA for resource Id of a specific resource type + * @param resourceId the ID of the resource + * @param resourceType the type of the resource + * @return returns true on successful enable + */ + boolean enableHA(Long resourceId, HAResource.ResourceType resourceType); + + /** + * Disables HA for resource Id of a specific resource type + * @param resourceId the ID of the resource + * @param resourceType the type of the resource + * @return returns true on successful disable + */ + boolean disableHA(Long resourceId, HAResource.ResourceType resourceType); + + /** + * Enables HA across a cluster + * @param cluster the cluster + * @return returns operation success + */ + boolean enableHA(final Cluster cluster); + + /** + * Disables HA across a cluster + * @param cluster the cluster + * @return returns operation success + */ + boolean disableHA(final Cluster cluster); + + /** + * Enables HA across a zone + * @param zone the zone + * @return returns operation success + */ + boolean enableHA(final DataCenter zone); + + /** + * Disables HA across a zone + * @param zone the zone + * @return returns operation success + */ + boolean disableHA(final DataCenter zone); + + /** + * Returns list of HA config for resources, by resource ID and/or type if provided + * @param resourceId (optional) ID of the resource + * @param resourceType (optional) type of the resource + * @return returns list of ha config for the resource + */ + List listHAResources(final Long resourceId, final HAResource.ResourceType resourceType); + + /** + * Returns list of HA providers for resources + * @param resourceType type of the resource + * @param entityType sub-type of the resource + * @return returns list of ha provider names + */ + List listHAProviders(final HAResource.ResourceType resourceType, final HAResource.ResourceSubType entityType); +} diff --git a/api/src/org/apache/cloudstack/ha/HAResource.java b/api/src/org/apache/cloudstack/ha/HAResource.java new file mode 100644 index 00000000000..650a58b3297 --- /dev/null +++ b/api/src/org/apache/cloudstack/ha/HAResource.java @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import org.apache.cloudstack.kernel.Partition; + +public interface HAResource extends Partition { + enum ResourceType { + Host, + VirtualMachine + } + + enum ResourceSubType { + KVM, + Simulator, + Unknown + } + + long getDataCenterId(); + Long getClusterId(); + ResourceType resourceType(); +} diff --git a/api/src/org/apache/cloudstack/kernel/Partition.java b/api/src/org/apache/cloudstack/kernel/Partition.java new file mode 100644 index 00000000000..6d93e3e23ce --- /dev/null +++ b/api/src/org/apache/cloudstack/kernel/Partition.java @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.kernel; + +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.api.InternalIdentity; + +public interface Partition extends InternalIdentity, Identity { + enum PartitionType { + Zone, Pod, Cluster, Host, VM + } + PartitionType partitionType(); +} diff --git a/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml b/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml index 293834eda8a..fb0e8780ecc 100644 --- a/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml +++ b/core/resources/META-INF/cloudstack/compute/spring-core-lifecycle-compute-context-inheritable.xml @@ -44,4 +44,9 @@ + + + + + diff --git a/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml b/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml index d5b912ac892..9020caef7b8 100644 --- a/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml +++ b/core/resources/META-INF/cloudstack/core/spring-core-registry-core-context.xml @@ -307,6 +307,11 @@ + + + + diff --git a/core/src/com/cloud/agent/api/CheckVMActivityOnStoragePoolCommand.java b/core/src/com/cloud/agent/api/CheckVMActivityOnStoragePoolCommand.java new file mode 100644 index 00000000000..b053f2895d2 --- /dev/null +++ b/core/src/com/cloud/agent/api/CheckVMActivityOnStoragePoolCommand.java @@ -0,0 +1,70 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +import com.cloud.agent.api.to.HostTO; +import com.cloud.agent.api.to.StorageFilerTO; +import com.cloud.host.Host; +import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; + +import org.joda.time.DateTime; +import java.util.List; + +public final class CheckVMActivityOnStoragePoolCommand extends Command { + + private HostTO host; + private StorageFilerTO pool; + private String volumeList; + private long suspectTimeSeconds; + + public CheckVMActivityOnStoragePoolCommand(final Host host, final StoragePool pool, final List volumeList, final DateTime suspectTime) { + this.host = new HostTO(host); + this.pool = new StorageFilerTO(pool); + this.suspectTimeSeconds = suspectTime.getMillis()/1000L; + final StringBuilder stringBuilder = new StringBuilder(); + for (final Volume v : volumeList) { + stringBuilder.append(v.getUuid()).append(","); + } + + this.volumeList = stringBuilder.deleteCharAt(stringBuilder.length() - 1).toString(); + } + + public String getVolumeList() { + return volumeList; + } + + public StorageFilerTO getPool() { + return pool; + } + + public HostTO getHost() { + return host; + } + + public long getSuspectTimeInSeconds() { + return suspectTimeSeconds; + } + + @Override + public boolean executeInSequence() { + return false; + } +} diff --git a/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java b/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java index 72ec83a41f7..3cba4929a57 100644 --- a/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java +++ b/core/test/org/apache/cloudstack/api/agent/test/CheckOnHostCommandTest.java @@ -27,6 +27,8 @@ import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.Date; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.kernel.Partition; import org.junit.Test; import com.cloud.agent.api.CheckOnHostCommand; @@ -38,6 +40,11 @@ import com.cloud.resource.ResourceState; public class CheckOnHostCommandTest { public Host host = new Host() { + @Override + public PartitionType partitionType() { + return PartitionType.Host; + } + @Override public Status getState() { return Status.Up; @@ -197,7 +204,12 @@ public class CheckOnHostCommandTest { @Override public Long getClusterId() { return 3L; - }; + } + + @Override + public ResourceType resourceType() { + return ResourceType.Host; + } @Override public String getPublicIpAddress() { @@ -254,6 +266,11 @@ public class CheckOnHostCommandTest { return false; }; + @Override + public boolean isDisabled() { + return false; + }; + @Override public ResourceState getResourceState() { return ResourceState.Enabled; diff --git a/engine/components-api/src/com/cloud/agent/AgentManager.java b/engine/components-api/src/com/cloud/agent/AgentManager.java index 244772d67d0..933c3eaef99 100644 --- a/engine/components-api/src/com/cloud/agent/AgentManager.java +++ b/engine/components-api/src/com/cloud/agent/AgentManager.java @@ -137,6 +137,8 @@ public interface AgentManager { void disconnectWithoutInvestigation(long hostId, Status.Event event); + void disconnectWithInvestigation(long hostId, Status.Event event); + public void pullAgentToMaintenance(long hostId); public void pullAgentOutMaintenance(long hostId); diff --git a/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java index faf3a3bfcd4..ea71a34dbb1 100644 --- a/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java +++ b/engine/orchestration/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java @@ -49,6 +49,7 @@ import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.managed.context.ManagedContextTimerTask; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.security.SSLUtils; +import org.apache.cloudstack.ha.dao.HAConfigDao; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import org.apache.log4j.Logger; @@ -123,6 +124,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust ConfigDepot _configDepot; @Inject private OutOfBandManagementDao outOfBandManagementDao; + @Inject + private HAConfigDao haConfigDao; protected ClusteredAgentManagerImpl() { super(); @@ -744,6 +747,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust final long lastPing = (System.currentTimeMillis() >> 10) - getTimeout(); _hostDao.markHostsAsDisconnected(vo.getMsid(), lastPing); outOfBandManagementDao.expireServerOwnership(vo.getMsid()); + haConfigDao.expireServerOwnership(vo.getMsid()); s_logger.info("Deleting entries from op_host_transfer table for Management server " + vo.getMsid()); cleanupTransferMap(vo.getMsid()); } diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java index 0c34e3c5285..75965fe4bd1 100644 --- a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineClusterVO.java @@ -16,8 +16,16 @@ // under the License. package org.apache.cloudstack.engine.datacenter.entity.api.db; -import java.util.Date; -import java.util.UUID; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.org.Cluster; +import com.cloud.org.Grouping; +import com.cloud.org.Managed.ManagedState; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; import javax.persistence.Column; import javax.persistence.Entity; @@ -29,18 +37,8 @@ import javax.persistence.Id; import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; - -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.org.Cluster; -import com.cloud.org.Grouping; -import com.cloud.org.Managed.ManagedState; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; +import java.util.Date; +import java.util.UUID; @Entity @Table(name = "cluster") @@ -243,4 +241,9 @@ public class EngineClusterVO implements EngineCluster, Identity { public State getState() { return state; } + + @Override + public PartitionType partitionType() { + return PartitionType.Cluster; + } } diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java index ca9ad50b3c1..ba967be6210 100644 --- a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineDataCenterVO.java @@ -16,9 +16,14 @@ // under the License. package org.apache.cloudstack.engine.datacenter.entity.api.db; -import java.util.Date; -import java.util.Map; -import java.util.UUID; +import com.cloud.network.Network.Provider; +import com.cloud.org.Grouping; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; import javax.persistence.Column; import javax.persistence.Entity; @@ -32,16 +37,9 @@ import javax.persistence.TableGenerator; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; - -import com.cloud.network.Network.Provider; -import com.cloud.org.Grouping; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; +import java.util.Date; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "data_center") @@ -501,4 +499,9 @@ public class EngineDataCenterVO implements EngineDataCenter, Identity { public void setIp6Dns2(String ip6Dns2) { this.ip6Dns2 = ip6Dns2; } + + @Override + public PartitionType partitionType() { + return PartitionType.Zone; + } } diff --git a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java index efa34dfec98..be1484f0bde 100644 --- a/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java +++ b/engine/orchestration/src/org/apache/cloudstack/engine/datacenter/entity/api/db/EngineHostVO.java @@ -16,10 +16,16 @@ // under the License. package org.apache.cloudstack.engine.datacenter.entity.api.db; -import java.util.Date; -import java.util.List; -import java.util.Map; -import java.util.UUID; +import com.cloud.host.Status; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.resource.ResourceState; +import com.cloud.storage.Storage.StoragePoolType; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import org.apache.cloudstack.api.Identity; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; +import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; import javax.persistence.Column; import javax.persistence.DiscriminatorColumn; @@ -36,18 +42,10 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import org.apache.cloudstack.api.Identity; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State; -import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event; - -import com.cloud.host.Status; -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.resource.ResourceState; -import com.cloud.storage.Storage.StoragePoolType; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "host") @@ -178,6 +176,11 @@ public class EngineHostVO implements EngineHost, Identity { return clusterId; } + @Override + public ResourceType resourceType() { + return ResourceType.Host; + } + public void setClusterId(Long clusterId) { this.clusterId = clusterId; } @@ -720,6 +723,11 @@ public class EngineHostVO implements EngineHost, Identity { resourceState = state; } + @Override + public boolean isDisabled() { + return (getResourceState() == ResourceState.Disabled); + } + @Override public boolean isInMaintenanceStates() { return (getResourceState() == ResourceState.Maintenance || getResourceState() == ResourceState.ErrorInMaintenance || getResourceState() == ResourceState.PrepareForMaintenance); @@ -758,4 +766,9 @@ public class EngineHostVO implements EngineHost, Identity { public State getOrchestrationState() { return orchestrationState; } + + @Override + public PartitionType partitionType() { + return PartitionType.Host; + } } diff --git a/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml b/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml index e8d6633d163..654bca99920 100644 --- a/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml +++ b/engine/schema/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml @@ -169,6 +169,8 @@ + + diff --git a/engine/schema/src/com/cloud/dc/ClusterVO.java b/engine/schema/src/com/cloud/dc/ClusterVO.java index b701542fbdb..2a76789136a 100644 --- a/engine/schema/src/com/cloud/dc/ClusterVO.java +++ b/engine/schema/src/com/cloud/dc/ClusterVO.java @@ -16,8 +16,12 @@ // under the License. package com.cloud.dc; -import java.util.Date; -import java.util.UUID; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.org.Cluster; +import com.cloud.org.Grouping; +import com.cloud.org.Managed.ManagedState; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; import javax.persistence.Column; import javax.persistence.Entity; @@ -27,13 +31,8 @@ import javax.persistence.GeneratedValue; import javax.persistence.GenerationType; import javax.persistence.Id; import javax.persistence.Table; - -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.org.Cluster; -import com.cloud.org.Grouping; -import com.cloud.org.Managed.ManagedState; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; +import java.util.Date; +import java.util.UUID; @Entity @Table(name = "cluster") @@ -192,4 +191,9 @@ public class ClusterVO implements Cluster { public void setUuid(String uuid) { this.uuid = uuid; } + + @Override + public PartitionType partitionType() { + return PartitionType.Cluster; + } } diff --git a/engine/schema/src/com/cloud/dc/DataCenterVO.java b/engine/schema/src/com/cloud/dc/DataCenterVO.java index 42ea34a4704..4ab0eada1e1 100644 --- a/engine/schema/src/com/cloud/dc/DataCenterVO.java +++ b/engine/schema/src/com/cloud/dc/DataCenterVO.java @@ -16,9 +16,10 @@ // under the License. package com.cloud.dc; -import java.util.Date; -import java.util.Map; -import java.util.UUID; +import com.cloud.network.Network.Provider; +import com.cloud.org.Grouping; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; import javax.persistence.Column; import javax.persistence.Entity; @@ -30,11 +31,9 @@ import javax.persistence.Id; import javax.persistence.Table; import javax.persistence.TableGenerator; import javax.persistence.Transient; - -import com.cloud.network.Network.Provider; -import com.cloud.org.Grouping; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; +import java.util.Date; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "data_center") @@ -454,4 +453,9 @@ public class DataCenterVO implements DataCenter { public void setIp6Dns2(String ip6Dns2) { this.ip6Dns2 = ip6Dns2; } + + @Override + public PartitionType partitionType() { + return PartitionType.Zone; + } } diff --git a/engine/schema/src/com/cloud/host/HostVO.java b/engine/schema/src/com/cloud/host/HostVO.java index c5938d65220..7fd1e710185 100644 --- a/engine/schema/src/com/cloud/host/HostVO.java +++ b/engine/schema/src/com/cloud/host/HostVO.java @@ -16,11 +16,12 @@ // under the License. package com.cloud.host; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.UUID; +import com.cloud.agent.api.VgpuTypesInfo; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.resource.ResourceState; +import com.cloud.storage.Storage.StoragePoolType; +import com.cloud.utils.NumbersUtil; +import com.cloud.utils.db.GenericDao; import javax.persistence.Column; import javax.persistence.DiscriminatorColumn; @@ -37,13 +38,11 @@ import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import com.cloud.agent.api.VgpuTypesInfo; -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.resource.ResourceState; -import com.cloud.storage.Storage.StoragePoolType; -import com.cloud.utils.NumbersUtil; -import com.cloud.utils.db.GenericDao; +import java.util.Date; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "host") @@ -178,6 +177,11 @@ public class HostVO implements Host { return clusterId; } + @Override + public ResourceType resourceType() { + return ResourceType.Host; + } + public void setClusterId(Long clusterId) { this.clusterId = clusterId; } @@ -713,6 +717,11 @@ public class HostVO implements Host { return (getResourceState() == ResourceState.Maintenance || getResourceState() == ResourceState.ErrorInMaintenance || getResourceState() == ResourceState.PrepareForMaintenance); } + @Override + public boolean isDisabled() { + return (getResourceState() == ResourceState.Disabled); + } + public long getUpdated() { return updated; } @@ -730,4 +739,9 @@ public class HostVO implements Host { public void setUuid(String uuid) { this.uuid = uuid; } + + @Override + public PartitionType partitionType() { + return PartitionType.Host; + } } diff --git a/engine/schema/src/com/cloud/vm/VMInstanceVO.java b/engine/schema/src/com/cloud/vm/VMInstanceVO.java index d4b18d078e4..b55e030620b 100644 --- a/engine/schema/src/com/cloud/vm/VMInstanceVO.java +++ b/engine/schema/src/com/cloud/vm/VMInstanceVO.java @@ -16,11 +16,14 @@ // under the License. package com.cloud.vm; -import java.security.NoSuchAlgorithmException; -import java.security.SecureRandom; -import java.util.Date; -import java.util.Map; -import java.util.UUID; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.utils.db.Encrypt; +import com.cloud.utils.db.GenericDao; +import com.cloud.utils.db.StateMachine; +import com.cloud.utils.fsm.FiniteStateObject; +import com.cloud.vm.VirtualMachine.State; +import org.apache.commons.codec.binary.Base64; +import org.apache.log4j.Logger; import javax.persistence.Column; import javax.persistence.DiscriminatorColumn; @@ -36,16 +39,11 @@ import javax.persistence.TableGenerator; import javax.persistence.Temporal; import javax.persistence.TemporalType; import javax.persistence.Transient; - -import org.apache.commons.codec.binary.Base64; -import org.apache.log4j.Logger; - -import com.cloud.hypervisor.Hypervisor.HypervisorType; -import com.cloud.utils.db.Encrypt; -import com.cloud.utils.db.GenericDao; -import com.cloud.utils.db.StateMachine; -import com.cloud.utils.fsm.FiniteStateObject; -import com.cloud.vm.VirtualMachine.State; +import java.security.NoSuchAlgorithmException; +import java.security.SecureRandom; +import java.util.Date; +import java.util.Map; +import java.util.UUID; @Entity @Table(name = "vm_instance") @@ -566,4 +564,9 @@ public class VMInstanceVO implements VirtualMachine, FiniteStateObject, StateDao { + HAConfig findHAResource(long resourceId, HAResource.ResourceType resourceType); + List listHAResource(final Long resourceId, final HAResource.ResourceType resourceType); + void expireServerOwnership(long serverId); +} \ No newline at end of file diff --git a/engine/schema/src/org/apache/cloudstack/ha/dao/HAConfigDaoImpl.java b/engine/schema/src/org/apache/cloudstack/ha/dao/HAConfigDaoImpl.java new file mode 100644 index 00000000000..4d74e2e6656 --- /dev/null +++ b/engine/schema/src/org/apache/cloudstack/ha/dao/HAConfigDaoImpl.java @@ -0,0 +1,149 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.dao; + +import com.cloud.utils.DateUtil; +import com.cloud.utils.db.Attribute; +import com.cloud.utils.db.DB; +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; +import com.cloud.utils.db.Transaction; +import com.cloud.utils.db.TransactionCallbackNoReturn; +import com.cloud.utils.db.TransactionLegacy; +import com.cloud.utils.db.TransactionStatus; +import com.cloud.utils.db.UpdateBuilder; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAConfigVO; +import org.apache.cloudstack.ha.HAResource; +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; + +import java.sql.PreparedStatement; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.List; + +@DB +@Component +public class HAConfigDaoImpl extends GenericDaoBase implements HAConfigDao { + private static final Logger LOG = Logger.getLogger(HAConfigDaoImpl.class); + + private static final String EXPIRE_OWNERSHIP = "UPDATE ha_config set mgmt_server_id=NULL where mgmt_server_id=?"; + + private SearchBuilder ResourceSearch; + private SearchBuilder StateUpdateSearch; + + private Attribute HAStateAttr; + private Attribute MsIdAttr; + private Attribute UpdateTimeAttr; + + public HAConfigDaoImpl() { + super(); + + ResourceSearch = createSearchBuilder(); + ResourceSearch.and("resourceId", ResourceSearch.entity().getResourceId(), SearchCriteria.Op.EQ); + ResourceSearch.and("resourceType", ResourceSearch.entity().getResourceType(), SearchCriteria.Op.EQ); + ResourceSearch.done(); + + StateUpdateSearch = createSearchBuilder(); + StateUpdateSearch.and("id", StateUpdateSearch.entity().getId(), SearchCriteria.Op.EQ); + StateUpdateSearch.and("haState", StateUpdateSearch.entity().getHaState(), SearchCriteria.Op.EQ); + StateUpdateSearch.and("update", StateUpdateSearch.entity().getUpdateCount(), SearchCriteria.Op.EQ); + StateUpdateSearch.done(); + + HAStateAttr = _allAttributes.get("haState"); + MsIdAttr = _allAttributes.get("managementServerId"); + UpdateTimeAttr = _allAttributes.get("updateTime"); + assert (HAStateAttr != null && MsIdAttr != null && UpdateTimeAttr != null) : "Couldn't find one of these attributes"; + } + + @Override + public boolean updateState(HAConfig.HAState currentState, HAConfig.Event event, HAConfig.HAState nextState, HAConfig vo, Object data) { + HAConfigVO haConfig = (HAConfigVO) vo; + if (haConfig == null) { + if (LOG.isTraceEnabled()) { + LOG.trace("Invalid ha config view object provided"); + } + return false; + } + + Long newManagementServerId = event.getServerId(); + if (currentState == nextState && (haConfig.getManagementServerId() != null && haConfig.getManagementServerId().equals(newManagementServerId))) { + return false; + } + + if (event == HAConfig.Event.Disabled) { + newManagementServerId = null; + } + + SearchCriteria sc = StateUpdateSearch.create(); + sc.setParameters("id", haConfig.getId()); + sc.setParameters("haState", currentState); + sc.setParameters("update", haConfig.getUpdateCount()); + + haConfig.incrUpdateCount(); + UpdateBuilder ub = getUpdateBuilder(haConfig); + ub.set(haConfig, HAStateAttr, nextState); + ub.set(haConfig, UpdateTimeAttr, DateUtil.currentGMTTime()); + ub.set(haConfig, MsIdAttr, newManagementServerId); + + int result = update(ub, sc, null); + if (LOG.isTraceEnabled() && result <= 0) { + LOG.trace(String.format("Failed to update HA state from:%s to:%s due to event:%s for the ha_config id:%d", currentState, nextState, event, haConfig.getId())); + } + return result > 0; + } + + @Override + public HAConfig findHAResource(final long resourceId, final HAResource.ResourceType resourceType) { + final SearchCriteria sc = ResourceSearch.create(); + sc.setParameters("resourceId", resourceId); + sc.setParameters("resourceType", resourceType); + return findOneBy(sc); + } + + @Override + public List listHAResource(final Long resourceId, final HAResource.ResourceType resourceType) { + final SearchCriteria sc = ResourceSearch.create(); + if (resourceId != null && resourceId > 0L) { + sc.setParameters("resourceId", resourceId); + } + if (resourceType != null) { + sc.setParameters("resourceType", resourceType); + } + return new ArrayList(listBy(sc)); + } + + @Override + public void expireServerOwnership(final long serverId) { + Transaction.execute(new TransactionCallbackNoReturn() { + @Override + public void doInTransactionWithoutResult(TransactionStatus status) { + TransactionLegacy txn = TransactionLegacy.currentTxn(); + try (final PreparedStatement pstmt = txn.prepareAutoCloseStatement(EXPIRE_OWNERSHIP);) { + pstmt.setLong(1, serverId); + pstmt.executeUpdate(); + } catch (SQLException e) { + txn.rollback(); + LOG.warn("Failed to expire HA ownership of management server id: " + serverId); + } + } + }); + } +} diff --git a/plugins/hypervisors/kvm/resources/META-INF/cloudstack/kvm-compute/spring-kvm-compute-context.xml b/plugins/hypervisors/kvm/resources/META-INF/cloudstack/kvm-compute/spring-kvm-compute-context.xml index 3c51a233275..9bcfdd9c306 100644 --- a/plugins/hypervisors/kvm/resources/META-INF/cloudstack/kvm-compute/spring-kvm-compute-context.xml +++ b/plugins/hypervisors/kvm/resources/META-INF/cloudstack/kvm-compute/spring-kvm-compute-context.xml @@ -30,5 +30,12 @@ + + + + + + + diff --git a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java index 8337a7b7646..a6cddc7dfab 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java +++ b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java @@ -29,7 +29,7 @@ import com.cloud.hypervisor.Hypervisor; import com.cloud.resource.ResourceManager; import com.cloud.storage.Storage.StoragePoolType; import com.cloud.utils.component.AdapterBase; - +import org.apache.cloudstack.ha.HAManager; import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; import org.apache.log4j.Logger; @@ -40,17 +40,23 @@ import java.util.List; public class KVMInvestigator extends AdapterBase implements Investigator { private final static Logger s_logger = Logger.getLogger(KVMInvestigator.class); @Inject - HostDao _hostDao; + private HostDao _hostDao; @Inject - AgentManager _agentMgr; + private AgentManager _agentMgr; @Inject - ResourceManager _resourceMgr; + private ResourceManager _resourceMgr; @Inject - PrimaryDataStoreDao _storagePoolDao; + private PrimaryDataStoreDao _storagePoolDao; + @Inject + private HAManager haManager; @Override public boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) throws UnknownVM { + if (haManager.isHAEligible(host)) { + return haManager.isVMAliveOnHost(host); + } Status status = isAgentAlive(host); + s_logger.debug("HA: HOST is ineligible legacy state " + status + " for host " + host.getId()); if (status == null) { throw new UnknownVM(); } @@ -67,6 +73,10 @@ public class KVMInvestigator extends AdapterBase implements Investigator { return null; } + if (haManager.isHAEligible(agent)) { + return haManager.getHostStatus(agent); + } + List clusterPools = _storagePoolDao.listPoolsByCluster(agent.getClusterId()); boolean hasNfs = false; for (StoragePoolVO pool : clusterPools) { @@ -123,6 +133,7 @@ public class KVMInvestigator extends AdapterBase implements Investigator { if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { hostStatus = Status.Down; } + s_logger.debug("HA: HOST is ineligible legacy state " + hostStatus + " for host " + agent.getId()); return hostStatus; } } diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java index b829f78f625..be5ab396d19 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHABase.java @@ -179,4 +179,9 @@ public class KVMHABase { return result; } + + public Boolean checkingHB() { + // TODO Auto-generated method stub + return null; + } } diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java index 565673ec7bc..c99670ceaff 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAChecker.java @@ -40,10 +40,10 @@ public class KVMHAChecker extends KVMHABase implements Callable { * True means heartbeaing is on going, or we can't get it's status. False * means heartbeating is stopped definitely */ - private Boolean checkingHB() { + @Override + public Boolean checkingHB() { List results = new ArrayList(); for (NfsStoragePool pool : _pools) { - Script cmd = new Script(s_heartBeatPath, _heartBeatCheckerTimeout, s_logger); cmd.add("-i", pool._poolIp); cmd.add("-p", pool._poolMountSourcePath); @@ -53,9 +53,9 @@ public class KVMHAChecker extends KVMHABase implements Callable { cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000)); OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); String result = cmd.execute(parser); - s_logger.debug("pool: " + pool._poolIp); - s_logger.debug("reture: " + result); - s_logger.debug("parser: " + parser.getLine()); + s_logger.debug("KVMHAChecker pool: " + pool._poolIp); + s_logger.debug("KVMHAChecker reture: " + result); + s_logger.debug("KVMHAChecker parser: " + parser.getLine()); if (result == null && parser.getLine().contains("> DEAD <")) { s_logger.debug("read heartbeat failed: "); results.add(false); diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java index 49aa99cea68..0cebb4c9b00 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAMonitor.java @@ -16,22 +16,20 @@ // under the License. package com.cloud.hypervisor.kvm.resource; -import java.util.ArrayList; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.concurrent.ConcurrentHashMap; - +import com.cloud.utils.script.Script; +import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.log4j.Logger; import org.libvirt.Connect; import org.libvirt.LibvirtException; import org.libvirt.StoragePool; import org.libvirt.StoragePoolInfo.StoragePoolState; -import org.apache.cloudstack.managed.context.ManagedContextRunnable; - -import com.cloud.utils.script.Script; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; public class KVMHAMonitor extends KVMHABase implements Runnable { private static final Logger s_logger = Logger.getLogger(KVMHAMonitor.class); @@ -73,6 +71,12 @@ public class KVMHAMonitor extends KVMHABase implements Runnable { } } + public NfsStoragePool getStoragePool(String uuid) { + synchronized (_storagePool) { + return _storagePool.get(uuid); + } + } + private class Monitor extends ManagedContextRunnable { @Override diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java new file mode 100644 index 00000000000..f14d0a25b7f --- /dev/null +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/KVMHAVMActivityChecker.java @@ -0,0 +1,70 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.hypervisor.kvm.resource; + +import com.cloud.utils.script.OutputInterpreter; +import com.cloud.utils.script.Script; +import org.apache.log4j.Logger; +import org.joda.time.Duration; + +import java.util.concurrent.Callable; + +public class KVMHAVMActivityChecker extends KVMHABase implements Callable { + private static final Logger LOG = Logger.getLogger(KVMHAVMActivityChecker.class); + + final private NfsStoragePool nfsStoragePool; + final private String hostIP; + final private String volumeUuidList; + final private String vmActivityCheckPath; + final private Duration activityScriptTimeout = Duration.standardSeconds(3600L); + final private long suspectTimeInSeconds; + + public KVMHAVMActivityChecker(final NfsStoragePool pool, final String host, final String volumeUUIDListString, String vmActivityCheckPath, final long suspectTime) { + this.nfsStoragePool = pool; + this.hostIP = host; + this.volumeUuidList = volumeUUIDListString; + this.vmActivityCheckPath = vmActivityCheckPath; + this.suspectTimeInSeconds = suspectTime; + } + + @Override + public Boolean checkingHB() { + Script cmd = new Script(vmActivityCheckPath, activityScriptTimeout.getStandardSeconds(), LOG); + cmd.add("-i", nfsStoragePool._poolIp); + cmd.add("-p", nfsStoragePool._poolMountSourcePath); + cmd.add("-m", nfsStoragePool._mountDestPath); + cmd.add("-h", hostIP); + cmd.add("-u", volumeUuidList); + cmd.add("-t", String.valueOf(String.valueOf(System.currentTimeMillis() / 1000))); + cmd.add("-d", String.valueOf(suspectTimeInSeconds)); + OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); + String result = cmd.execute(parser); + LOG.debug("KVMHAVMActivityChecker pool: " + nfsStoragePool._poolIp); + LOG.debug("KVMHAVMActivityChecker result: " + result); + LOG.debug("KVMHAVMActivityChecker parser: " + parser.getLine()); + if (result == null && parser.getLine().contains("DEAD")) { + return false; + } else { + return true; + } + } + + @Override + public Boolean call() throws Exception { + return checkingHB(); + } +} diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 2440e624ffa..4b5811b5b92 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -201,6 +201,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv private String _resizeVolumePath; private String _createTmplPath; private String _heartBeatPath; + private String _vmActivityCheckPath; private String _securityGroupPath; private String _ovsPvlanDhcpHostPath; private String _ovsPvlanVmPath; @@ -447,6 +448,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv return _guestBridgeName; } + public String getVmActivityCheckPath() { + return _vmActivityCheckPath; + } + public String getOvsPvlanDhcpHostPath() { return _ovsPvlanDhcpHostPath; } @@ -687,6 +692,11 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv throw new ConfigurationException("Unable to find the resizevolume.sh"); } + _vmActivityCheckPath = Script.findScript(kvmScriptsDir, "kvmvmactivity.sh"); + if (_vmActivityCheckPath == null) { + throw new ConfigurationException("Unable to find kvmvmactivity.sh"); + } + _createTmplPath = Script.findScript(storageScriptsDir, "createtmplt.sh"); if (_createTmplPath == null) { throw new ConfigurationException("Unable to find the createtmplt.sh"); diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java index bc648f2f446..651cdc9e888 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtCheckOnHostCommandWrapper.java @@ -53,14 +53,14 @@ public final class LibvirtCheckOnHostCommandWrapper extends CommandWrapper { + + @Override + public Answer execute(final CheckVMActivityOnStoragePoolCommand command, final LibvirtComputingResource libvirtComputingResource) { + final ExecutorService executors = Executors.newSingleThreadExecutor(); + final KVMHAMonitor monitor = libvirtComputingResource.getMonitor(); + final StorageFilerTO pool = command.getPool(); + if (Storage.StoragePoolType.NetworkFilesystem == pool.getType()){ + final NfsStoragePool nfspool = monitor.getStoragePool(pool.getUuid()); + final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost().getPrivateNetwork().getIp(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds()); + final Future future = executors.submit(ha); + try { + final Boolean result = future.get(); + if (result) { + return new Answer(command, false, "VMHA disk activity detected ..."); + } else { + return new Answer(command); + } + } catch (InterruptedException e) { + return new Answer(command, false, "CheckVMActivityOnStoragePoolCommand: can't get status of host: InterruptedException"); + } catch (ExecutionException e) { + return new Answer(command, false, "CheckVMActivityOnStoragePoolCommand: can't get status of host: ExecutionException"); + } + } + return new Answer(command, false, "Unsupported Storage"); + } +} diff --git a/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAConfig.java b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAConfig.java new file mode 100644 index 00000000000..59ea720328f --- /dev/null +++ b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAConfig.java @@ -0,0 +1,56 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.kvm.ha; + +import org.apache.cloudstack.framework.config.ConfigKey; + +public class KVMHAConfig { + + public static final ConfigKey KvmHAHealthCheckTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.health.check.timeout", "10", + "The maximum length of time, in seconds, expected for an health check to complete.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.timeout", "60", + "The maximum length of time, in seconds, expected for an activity check to complete.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckInterval = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.interval", "60", + "The interval, in seconds, between activity checks.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckMaxAttempts = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.max.attempts", "10", + "The maximum number of activity check attempts to perform before deciding to recover or degrade a resource.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAActivityCheckFailureThreshold = new ConfigKey<>("Advanced", Double.class, "kvm.ha.activity.check.failure.ratio", "0.7", + "The activity check failure threshold ratio. This is used with the activity check maximum attempts for deciding to recover or degrade a resource. For most environments, please keep this value above 0.5.", + true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHADegradedMaxPeriod = new ConfigKey<>("Advanced", Long.class, "kvm.ha.degraded.max.period", "300", + "The maximum length of time, in seconds, a resource can be in degraded state where only health checks are performed.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHARecoverTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.timeout", "60", + "The maximum length of time, in seconds, expected for a recovery operation to complete.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHARecoverWaitPeriod = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.wait.period", "600", + "The maximum length of time, in seconds, to wait for a resource to recover.", true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHARecoverAttemptThreshold = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.failure.threshold", "1", + "The maximum recovery attempts to be made for a resource, after which the resource is fenced. The recovery counter resets when a health check passes for a resource.", + true, ConfigKey.Scope.Cluster); + + public static final ConfigKey KvmHAFenceTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.fence.timeout", "60", + "The maximum length of time, in seconds, expected for a fence operation to complete.", true, ConfigKey.Scope.Cluster); + +} diff --git a/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAProvider.java b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAProvider.java new file mode 100644 index 00000000000..5399fd23a1c --- /dev/null +++ b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHAProvider.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.cloudstack.kvm.ha; + +import com.cloud.host.Host; +import com.cloud.hypervisor.Hypervisor; + +import org.apache.cloudstack.api.response.OutOfBandManagementResponse; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; +import org.apache.cloudstack.ha.provider.host.HAAbstractHostProvider; +import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement.PowerOperation; +import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementService; +import org.apache.log4j.Logger; +import org.joda.time.DateTime; + +import javax.inject.Inject; +import java.security.InvalidParameterException; + +public final class KVMHAProvider extends HAAbstractHostProvider implements HAProvider, Configurable { + private final static Logger LOG = Logger.getLogger(KVMHAProvider.class); + + @Inject + protected KVMHostActivityChecker hostActivityChecker; + @Inject + protected OutOfBandManagementService outOfBandManagementService; + + @Override + public boolean isEligible(final Host host) { + if (outOfBandManagementService.isOutOfBandManagementEnabled(host)){ + return !isInMaintenanceMode(host) && !isDisabled(host) && + hostActivityChecker.getNeighbors(host).length > 0 && + (Hypervisor.HypervisorType.KVM.equals(host.getHypervisorType()) || + Hypervisor.HypervisorType.LXC.equals(host.getHypervisorType())); + } + return false; + } + + @Override + public boolean isHealthy(final Host r) throws HACheckerException { + return hostActivityChecker.isHealthy(r); + } + + @Override + public boolean hasActivity(final Host r, final DateTime suspectTime) throws HACheckerException { + return hostActivityChecker.isActive(r, suspectTime); + } + + @Override + public boolean recover(Host r) throws HARecoveryException { + try { + if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){ + final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.RESET, null); + return resp.getSuccess(); + } else { + LOG.warn("OOBM recover operation failed for the host " + r.getName()); + return false; + } + } catch (Exception e){ + LOG.warn("OOBM service is not configured or enabled for this host " + r.getName() + " error is " + e.getMessage()); + throw new HARecoveryException(" OOBM service is not configured or enabled for this host " + r.getName(), e); + } + } + + @Override + public boolean fence(Host r) throws HAFenceException { + try { + if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){ + final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.OFF, null); + return resp.getSuccess(); + } else { + LOG.warn("OOBM fence operation failed for this host " + r.getName()); + return false; + } + } catch (Exception e){ + LOG.warn("OOBM service is not configured or enabled for this host " + r.getName() + " error is " + e.getMessage()); + throw new HAFenceException("OOBM service is not configured or enabled for this host " + r.getName() , e); + } + } + + @Override + public HAResource.ResourceSubType resourceSubType() { + return HAResource.ResourceSubType.KVM; + } + + @Override + public Object getConfigValue(final HAProviderConfig name, final Host host) { + final Long clusterId = host.getClusterId(); + switch (name) { + case HealthCheckTimeout: + return KVMHAConfig.KvmHAHealthCheckTimeout.valueIn(clusterId); + case ActivityCheckTimeout: + return KVMHAConfig.KvmHAActivityCheckTimeout.valueIn(clusterId); + case MaxActivityCheckInterval: + return KVMHAConfig.KvmHAActivityCheckInterval.valueIn(clusterId); + case MaxActivityChecks: + return KVMHAConfig.KvmHAActivityCheckMaxAttempts.valueIn(clusterId); + case ActivityCheckFailureRatio: + return KVMHAConfig.KvmHAActivityCheckFailureThreshold.valueIn(clusterId); + case RecoveryWaitTimeout: + return KVMHAConfig.KvmHARecoverWaitPeriod.valueIn(clusterId); + case RecoveryTimeout: + return KVMHAConfig.KvmHARecoverTimeout.valueIn(clusterId); + case FenceTimeout: + return KVMHAConfig.KvmHAFenceTimeout.valueIn(clusterId); + case MaxRecoveryAttempts: + return KVMHAConfig.KvmHARecoverAttemptThreshold.valueIn(clusterId); + case MaxDegradedWaitTimeout: + return KVMHAConfig.KvmHADegradedMaxPeriod.valueIn(clusterId); + default: + throw new InvalidParameterException("Unknown HAProviderConfig " + name.toString()); + } + } + + @Override + public String getConfigComponentName() { + return KVMHAConfig.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[] { + KVMHAConfig.KvmHAHealthCheckTimeout, + KVMHAConfig.KvmHAActivityCheckTimeout, + KVMHAConfig.KvmHARecoverTimeout, + KVMHAConfig.KvmHAFenceTimeout, + KVMHAConfig.KvmHAActivityCheckInterval, + KVMHAConfig.KvmHAActivityCheckMaxAttempts, + KVMHAConfig.KvmHAActivityCheckFailureThreshold, + KVMHAConfig.KvmHADegradedMaxPeriod, + KVMHAConfig.KvmHARecoverWaitPeriod, + KVMHAConfig.KvmHARecoverAttemptThreshold + }; + } +} diff --git a/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java new file mode 100644 index 00000000000..060b484fec7 --- /dev/null +++ b/plugins/hypervisors/kvm/src/org/apache/cloudstack/kvm/ha/KVMHostActivityChecker.java @@ -0,0 +1,205 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.kvm.ha; + +import com.cloud.agent.AgentManager; +import com.cloud.agent.api.Answer; +import com.cloud.agent.api.CheckOnHostCommand; +import com.cloud.agent.api.CheckVMActivityOnStoragePoolCommand; +import com.cloud.exception.StorageUnavailableException; +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.resource.ResourceManager; +import com.cloud.storage.StorageManager; +import com.cloud.storage.StoragePool; +import com.cloud.storage.Volume; +import com.cloud.storage.VolumeVO; +import com.cloud.storage.dao.VolumeDao; +import com.cloud.utils.component.AdapterBase; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine; +import com.cloud.vm.dao.VMInstanceDao; +import org.apache.cloudstack.ha.provider.ActivityCheckerInterface; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HealthCheckerInterface; +import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; +import org.apache.commons.lang.ArrayUtils; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import java.util.ArrayList; +import org.joda.time.DateTime; +import java.util.HashMap; +import java.util.List; + +public class KVMHostActivityChecker extends AdapterBase implements ActivityCheckerInterface, HealthCheckerInterface { + private final static Logger LOG = Logger.getLogger(KVMHostActivityChecker.class); + + @Inject + private VolumeDao volumeDao; + @Inject + private VMInstanceDao vmInstanceDao; + @Inject + private AgentManager agentMgr; + @Inject + private PrimaryDataStoreDao storagePool; + @Inject + private StorageManager storageManager; + @Inject + private ResourceManager resourceManager; + + @Override + public boolean isActive(Host r, DateTime suspectTime) throws HACheckerException { + try { + return isVMActivtyOnHost(r, suspectTime); + } + catch (StorageUnavailableException e){ + throw new HACheckerException("Storage is unavailable to do the check, mostly host is not reachable ", e); + } + catch (Exception e){ + throw new HACheckerException("Operation timed out, mostly host is not reachable ", e); + } + } + + @Override + public boolean isHealthy(Host r) { + return isAgentActive(r); + } + + private boolean isAgentActive(Host agent) { + if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) { + throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType()); + } + Status hostStatus = Status.Unknown; + Status neighbourStatus = Status.Unknown; + final CheckOnHostCommand cmd = new CheckOnHostCommand(agent); + try { + Answer answer = agentMgr.easySend(agent.getId(), cmd); + if (answer != null) { + hostStatus = answer.getResult() ? Status.Down : Status.Up; + if ( hostStatus == Status.Up ){ + return true; + } + } + else { + hostStatus = Status.Disconnected; + } + } catch (Exception e) { + LOG.warn("Failed to send command to host: " + agent.getId()); + } + + List neighbors = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); + for (HostVO neighbor : neighbors) { + if (neighbor.getId() == agent.getId() || (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) { + continue; + } + if (LOG.isTraceEnabled()){ + LOG.trace("Investigating host:" + agent.getId() + " via neighbouring host:" + neighbor.getId()); + } + try { + Answer answer = agentMgr.easySend(neighbor.getId(), cmd); + if (answer != null) { + neighbourStatus = answer.getResult() ? Status.Down : Status.Up; + if (LOG.isTraceEnabled()){ + LOG.trace("Neighbouring host:" + neighbor.getId() + " returned status:" + neighbourStatus + " for the investigated host:" + agent.getId()); + } + if (neighbourStatus == Status.Up) { + break; + } + } + } catch (Exception e) { + if (LOG.isTraceEnabled()) { + LOG.trace("Failed to send command to host: " + neighbor.getId()); + } + } + } + if (neighbourStatus == Status.Up && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { + hostStatus = Status.Disconnected; + } + if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { + hostStatus = Status.Down; + } + + if (LOG.isTraceEnabled()){ + LOG.trace("Resource state = " + hostStatus.name()); + } + return hostStatus == Status.Up; + } + + private boolean isVMActivtyOnHost(Host agent, DateTime suspectTime) throws StorageUnavailableException { + if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) { + throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType()); + } + boolean activityStatus = true; + HashMap> poolVolMap = getVolumeUuidOnHost(agent); + for (StoragePool pool : poolVolMap.keySet()) { + //for each storage pool find activity + List volume_list = poolVolMap.get(pool); + final CheckVMActivityOnStoragePoolCommand cmd = new CheckVMActivityOnStoragePoolCommand(agent, pool, volume_list, suspectTime); + //send the command to appropriate storage pool + Answer answer = storageManager.sendToPool(pool, getNeighbors(agent), cmd); + if (answer != null) { + activityStatus = ! answer.getResult(); + } else { + throw new IllegalStateException("Did not get a valid response for VM activity check for host " + agent.getId()); + } + } + if (LOG.isDebugEnabled()){ + LOG.debug("Resource active = " + activityStatus); + } + return activityStatus; + } + + private HashMap> getVolumeUuidOnHost(Host agent) { + List vm_list = vmInstanceDao.listByHostId(agent.getId()); + List volume_list = new ArrayList(); + for (VirtualMachine vm : vm_list) { + List vm_volume_list = volumeDao.findByInstance(vm.getId()); + volume_list.addAll(vm_volume_list); + } + + HashMap> poolVolMap = new HashMap>(); + for (Volume vol : volume_list) { + StoragePool sp = storagePool.findById(vol.getPoolId()); + if (!poolVolMap.containsKey(sp)) { + List list = new ArrayList(); + list.add(vol); + + poolVolMap.put(sp, list); + } else { + poolVolMap.get(sp).add(vol); + } + } + return poolVolMap; + } + + public long[] getNeighbors(Host agent) { + List neighbors = new ArrayList(); + List cluster_hosts = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); + for (HostVO host : cluster_hosts) { + if (host.getId() == agent.getId() || (host.getHypervisorType() != Hypervisor.HypervisorType.KVM && host.getHypervisorType() != Hypervisor.HypervisorType.LXC)) { + continue; + } + neighbors.add(host.getId()); + } + return ArrayUtils.toPrimitive(neighbors.toArray(new Long[neighbors.size()])); + } + +} diff --git a/plugins/hypervisors/kvm/test/org/apache/cloudstack/kvm/ha/KVMHostHATest.java b/plugins/hypervisors/kvm/test/org/apache/cloudstack/kvm/ha/KVMHostHATest.java new file mode 100644 index 00000000000..26b7e6ae206 --- /dev/null +++ b/plugins/hypervisors/kvm/test/org/apache/cloudstack/kvm/ha/KVMHostHATest.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.cloudstack.kvm.ha; + +import com.cloud.exception.StorageUnavailableException; +import com.cloud.host.Host; +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.junit.Before; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.mockito.runners.MockitoJUnitRunner; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.mockito.Mockito.when; + +import org.joda.time.DateTime; + +@RunWith(MockitoJUnitRunner.class) +public class KVMHostHATest { + + @Mock + private Host host; + @Mock + private KVMHostActivityChecker kvmHostActivityChecker; + private KVMHAProvider kvmHAProvider; + + @Before + public void setup() { + MockitoAnnotations.initMocks(this); + kvmHAProvider = new KVMHAProvider(); + kvmHAProvider.hostActivityChecker = kvmHostActivityChecker; + } + + @Test + public void testHostActivityForHealthyHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + when(kvmHostActivityChecker.isHealthy(host)).thenReturn(true); + assertTrue(kvmHAProvider.isHealthy(host)); + } + + @Test + public void testHostActivityForUnHealthyHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + when(kvmHostActivityChecker.isHealthy(host)).thenReturn(false); + assertFalse(kvmHAProvider.isHealthy(host)); + } + + @Test + public void testHostActivityForActiveHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + DateTime dt = new DateTime(); + when(kvmHostActivityChecker.isActive(host, dt)).thenReturn(true); + assertTrue(kvmHAProvider.hasActivity(host, dt)); + } + + @Test + public void testHostActivityForDownHost() throws HACheckerException, StorageUnavailableException { + when(host.getHypervisorType()).thenReturn(HypervisorType.KVM); + DateTime dt = new DateTime(); + when(kvmHostActivityChecker.isActive(host, dt)).thenReturn(false); + assertFalse(kvmHAProvider.hasActivity(host, dt)); + } + +} diff --git a/plugins/hypervisors/simulator/pom.xml b/plugins/hypervisors/simulator/pom.xml index 34891e4ee9d..a49679e082d 100644 --- a/plugins/hypervisors/simulator/pom.xml +++ b/plugins/hypervisors/simulator/pom.xml @@ -63,5 +63,10 @@ cloud-engine-storage-snapshot ${project.version} + + com.google.guava + guava + ${cs.guava.version} + diff --git a/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml b/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml index c95e4bbba26..5b779f7a398 100644 --- a/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml +++ b/plugins/hypervisors/simulator/resources/META-INF/cloudstack/simulator-compute/spring-simulator-compute-context.xml @@ -36,4 +36,8 @@ + + + + diff --git a/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java b/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java index b20bd3d8034..ae06a12ad70 100644 --- a/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java +++ b/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java @@ -112,6 +112,8 @@ import com.cloud.agent.api.storage.ListVolumeCommand; import com.cloud.agent.api.storage.PrimaryStorageDownloadCommand; import com.cloud.api.commands.CleanupSimulatorMockCmd; import com.cloud.api.commands.ConfigureSimulatorCmd; +import com.cloud.api.commands.ConfigureSimulatorHAProviderState; +import com.cloud.api.commands.ListSimulatorHAStateTransitions; import com.cloud.api.commands.QuerySimulatorMockCmd; import com.cloud.resource.SimulatorStorageProcessor; import com.cloud.serializer.GsonHelper; @@ -193,6 +195,8 @@ public class SimulatorManagerImpl extends ManagerBase implements SimulatorManage cmdList.add(ConfigureSimulatorCmd.class); cmdList.add(QuerySimulatorMockCmd.class); cmdList.add(CleanupSimulatorMockCmd.class); + cmdList.add(ConfigureSimulatorHAProviderState.class); + cmdList.add(ListSimulatorHAStateTransitions.class); return cmdList; } diff --git a/plugins/hypervisors/simulator/src/com/cloud/api/commands/ConfigureSimulatorHAProviderState.java b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ConfigureSimulatorHAProviderState.java new file mode 100644 index 00000000000..1d68a184a5a --- /dev/null +++ b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ConfigureSimulatorHAProviderState.java @@ -0,0 +1,120 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.api.commands; + +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.SuccessResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.SimulatorHAProvider; +import org.apache.cloudstack.ha.SimulatorHAState; + +import javax.inject.Inject; + +@APICommand(name = ConfigureSimulatorHAProviderState.APINAME, + description="configures simulator HA provider state for a host for probing and testing", + responseObject=SuccessResponse.class, + since = "4.11", authorized = {RoleType.Admin}) +public final class ConfigureSimulatorHAProviderState extends BaseCmd { + public static final String APINAME = "configureSimulatorHAProviderState"; + + @Inject + private HAManager haManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = BaseCmd.CommandType.UUID, entityType = HostResponse.class, + description = "List by host ID", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + @Parameter(name = ApiConstants.HEALTH, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should be healthy", + required = true) + private Boolean healthy; + + @Parameter(name = ApiConstants.ACTIVITY, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should have activity", + required = true) + private Boolean activity; + + @Parameter(name = ApiConstants.RECOVER, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should be be recoverable", + required = true) + private Boolean recovery; + + @Parameter(name = ApiConstants.FENCE, type = CommandType.BOOLEAN, + description = "Set true is haprovider for simulator host should be be fence-able", + required = true) + private Boolean fenceable; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + final SimulatorHAState haState = new SimulatorHAState(healthy, activity, recovery, fenceable); + final SimulatorHAProvider simulatorHAProvider = (SimulatorHAProvider) haManager.getHAProvider(SimulatorHAProvider.class.getSimpleName().toLowerCase()); + if (simulatorHAProvider != null) { + simulatorHAProvider.setHAStateForHost(host.getId(), haState); + } + final SuccessResponse response = new SuccessResponse(); + response.setSuccess(simulatorHAProvider != null); + response.setResponseName(getCommandName()); + response.setObjectName("simulatorhaprovider"); + setResponseObject(response); + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } +} diff --git a/plugins/hypervisors/simulator/src/com/cloud/api/commands/ListSimulatorHAStateTransitions.java b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ListSimulatorHAStateTransitions.java new file mode 100644 index 00000000000..52368747523 --- /dev/null +++ b/plugins/hypervisors/simulator/src/com/cloud/api/commands/ListSimulatorHAStateTransitions.java @@ -0,0 +1,104 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.api.commands; + +import com.cloud.api.response.SimulatorHAStateResponse; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.host.Host; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiArgValidator; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.BaseListCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.ListResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.SimulatorHAProvider; + +import javax.inject.Inject; +import java.util.ArrayList; +import java.util.List; + +@APICommand(name = ListSimulatorHAStateTransitions.APINAME, + description="list recent simulator HA state transitions for a host for probing and testing", + responseObject=SimulatorHAStateResponse.class, + since = "4.11", authorized = {RoleType.Admin}) +public final class ListSimulatorHAStateTransitions extends BaseListCmd { + public static final String APINAME = "listSimulatorHAStateTransitions"; + + @Inject + private HAManager haManager; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.HOST_ID, type = BaseCmd.CommandType.UUID, entityType = HostResponse.class, + description = "List by host ID", required = true, validations = {ApiArgValidator.PositiveNumber}) + private Long hostId; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getHostId() { + return hostId; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + final Host host = _resourceService.getHost(getHostId()); + if (host == null) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId()); + } + + final SimulatorHAProvider simulatorHAProvider = (SimulatorHAProvider) haManager.getHAProvider(SimulatorHAProvider.class.getSimpleName().toLowerCase()); + List recentStates = new ArrayList<>(); + if (simulatorHAProvider != null) { + recentStates = simulatorHAProvider.listHAStateTransitions(host.getId()); + } + final ListResponse response = new ListResponse<>(); + response.setResponses(recentStates); + response.setResponseName(getCommandName()); + response.setObjectName("simulatorhastatetransition"); + setResponseObject(response); + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } +} diff --git a/plugins/hypervisors/simulator/src/com/cloud/api/response/SimulatorHAStateResponse.java b/plugins/hypervisors/simulator/src/com/cloud/api/response/SimulatorHAStateResponse.java new file mode 100644 index 00000000000..47481ad605f --- /dev/null +++ b/plugins/hypervisors/simulator/src/com/cloud/api/response/SimulatorHAStateResponse.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; +import org.apache.cloudstack.ha.HAConfig; + +public class SimulatorHAStateResponse extends BaseResponse { + @SerializedName(ApiConstants.HA_STATE) @Param(description="the ha state") + private String haState; + + @SerializedName("prevhastate") @Param(description="the previous ha state") + private String previousHaState; + + @SerializedName("event") @Param(description="the event that caused state transition") + private String haEvent; + + @SerializedName("activitycounter") @Param(description="the activity counter") + private Long activityCounter; + + @SerializedName("recoverycounter") @Param(description="the recovery counter") + private Long recoveryCounter; + + public void setHaState(final HAConfig.HAState haState) { + if (haState != null) { + this.haState = haState.toString().toLowerCase(); + } + } + + public void setPreviousHaState(final HAConfig.HAState previousHaState) { + if (previousHaState != null) { + this.previousHaState = previousHaState.toString().toLowerCase(); + } + } + + public void setHaEvent(final HAConfig.Event haEvent) { + this.haEvent = haEvent.toString().toLowerCase(); + } + + public void setActivityCounter(Long activityCounter) { + this.activityCounter = activityCounter; + } + + public void setRecoveryCounter(Long recoveryCounter) { + this.recoveryCounter = recoveryCounter; + } +} diff --git a/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java b/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java index aa40f1bb28d..8996d5af91c 100644 --- a/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java +++ b/plugins/hypervisors/simulator/src/com/cloud/ha/SimulatorInvestigator.java @@ -21,6 +21,7 @@ import java.util.List; import javax.inject.Inject; import org.apache.log4j.Logger; +import org.apache.cloudstack.ha.HAManager; import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; @@ -48,6 +49,8 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator { ResourceManager _resourceMgr; @Inject MockConfigurationDao _mockConfigDao; + @Inject + private HAManager haManager; protected SimulatorInvestigator() { } @@ -58,6 +61,10 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator { return null; } + if (haManager.isHAEligible(agent)) { + return haManager.getHostStatus(agent); + } + CheckOnHostCommand cmd = new CheckOnHostCommand(agent); List neighbors = _resourceMgr.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); for (HostVO neighbor : neighbors) { @@ -79,6 +86,9 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator { @Override public boolean isVmAlive(VirtualMachine vm, Host host) throws UnknownVM { + if (haManager.isHAEligible(host)) { + return haManager.isVMAliveOnHost(host); + } CheckVirtualMachineCommand cmd = new CheckVirtualMachineCommand(vm.getInstanceName()); try { Answer answer = _agentMgr.send(vm.getHostId(), cmd); diff --git a/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java new file mode 100644 index 00000000000..3c3e92f6fff --- /dev/null +++ b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAProvider.java @@ -0,0 +1,152 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.api.response.SimulatorHAStateResponse; +import com.cloud.host.Host; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.utils.fsm.StateListener; +import com.cloud.utils.fsm.StateMachine2; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; +import org.apache.cloudstack.ha.provider.host.HAAbstractHostProvider; +import org.joda.time.DateTime; + +import javax.inject.Inject; +import java.security.InvalidParameterException; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class SimulatorHAProvider extends HAAbstractHostProvider implements HAProvider, StateListener { + + @Inject + private HAManager haManager; + + private final Map hostHAStateMap = new ConcurrentHashMap<>(); + + public SimulatorHAProvider() { + HAConfig.HAState.getStateMachine().registerListener(this); + } + + public void setHAStateForHost(final Long hostId, final SimulatorHAState state) { + hostHAStateMap.put(hostId, state); + haManager.purgeHACounter(hostId, HAResource.ResourceType.Host); + } + + public List listHAStateTransitions(final Long hostId) { + final SimulatorHAState haState = hostHAStateMap.get(hostId); + if (haState == null) { + return Collections.emptyList(); + } + return haState.listRecentStateTransitions(); + } + + @Override + public HAResource.ResourceType resourceType() { + return HAResource.ResourceType.Host; + } + + @Override + public HAResource.ResourceSubType resourceSubType() { + return HAResource.ResourceSubType.Simulator; + } + + @Override + public boolean isEligible(final Host host) { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return !isInMaintenanceMode(host) && !isDisabled(host) && haState != null + && Hypervisor.HypervisorType.Simulator.equals(host.getHypervisorType()); + } + + @Override + public boolean isHealthy(final Host host) throws HACheckerException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.isHealthy(); + } + + @Override + public boolean hasActivity(final Host host, final DateTime afterThis) throws HACheckerException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.hasActivity(); + } + + @Override + public boolean recover(final Host host) throws HARecoveryException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.canRecover(); + } + + @Override + public boolean fence(final Host host) throws HAFenceException { + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return haState != null && haState.canFenced(); + } + + @Override + public Object getConfigValue(final HAProvider.HAProviderConfig name, final Host host) { + switch (name) { + case HealthCheckTimeout: + return 5L; + case ActivityCheckTimeout: + return 5L; + case RecoveryTimeout: + return 5L; + case FenceTimeout: + return 5L; + case MaxActivityCheckInterval: + return 1L; + case MaxActivityChecks: + return 3L; + case ActivityCheckFailureRatio: + final SimulatorHAState haState = hostHAStateMap.get(host.getId()); + return (haState != null && haState.hasActivity()) ? 1.0 : 0.0; + case MaxDegradedWaitTimeout: + return 1L; + case MaxRecoveryAttempts: + return 2L; + case RecoveryWaitTimeout: + return 1L; + default: + throw new InvalidParameterException("Unknown HAProviderConfig " + name.toString()); + } + } + + @Override + public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event, + final HAConfig.HAState newState, final HAConfig vo, final boolean status, final Object opaque) { + return false; + } + + @Override + public boolean postStateTransitionEvent(final StateMachine2.Transition transition, + final HAConfig vo, final boolean status, final Object opaque) { + if (vo.getResourceType() != HAResource.ResourceType.Host) { + return false; + } + final SimulatorHAState haState = hostHAStateMap.get(vo.getResourceId()); + if (haState == null || !status) { + return false; + } + final HAResourceCounter counter = haManager.getHACounter(vo.getResourceId(), vo.getResourceType()); + return haState.addStateTransition(transition.getToState(), transition.getCurrentState(), transition.getEvent(), counter); + } +} \ No newline at end of file diff --git a/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAState.java b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAState.java new file mode 100644 index 00000000000..f47ed2f11a2 --- /dev/null +++ b/plugins/hypervisors/simulator/src/org/apache/cloudstack/ha/SimulatorHAState.java @@ -0,0 +1,89 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.api.response.SimulatorHAStateResponse; +import com.google.common.collect.EvictingQueue; + +import java.util.ArrayList; +import java.util.List; +import java.util.Queue; + +public final class SimulatorHAState { + private boolean healthy; + private boolean activity; + private boolean recover; + private boolean fence; + private Queue stateTransitions = EvictingQueue.create(100); + + public SimulatorHAState(boolean healthy, boolean activity, boolean recover, boolean fence) { + this.healthy = healthy; + this.activity = activity; + this.recover = recover; + this.fence = fence; + } + + public boolean isHealthy() { + return healthy; + } + + public void setHealthy(boolean healthy) { + this.healthy = healthy; + } + + public boolean hasActivity() { + return activity; + } + + public void setActivity(boolean activity) { + this.activity = activity; + } + + public boolean canRecover() { + return recover; + } + + public void setRecover(boolean recover) { + this.recover = recover; + } + + public boolean canFenced() { + return fence; + } + + public void setFence(boolean fence) { + this.fence = fence; + } + + public boolean addStateTransition(final HAConfig.HAState newHaState, final HAConfig.HAState oldHaState, final HAConfig.Event event, final HAResourceCounter counter) { + final SimulatorHAStateResponse stateResponse = new SimulatorHAStateResponse(); + stateResponse.setHaState(newHaState); + stateResponse.setPreviousHaState(oldHaState); + stateResponse.setHaEvent(event); + if (counter != null) { + stateResponse.setActivityCounter(counter.getActivityCheckCounter()); + stateResponse.setRecoveryCounter(counter.getRecoveryCounter()); + } + stateResponse.setObjectName("hastatetransition"); + return stateTransitions.add(stateResponse); + } + + public List listRecentStateTransitions() { + return new ArrayList<>(stateTransitions); + } +} diff --git a/scripts/vm/hypervisor/kvm/kvmvmactivity.sh b/scripts/vm/hypervisor/kvm/kvmvmactivity.sh new file mode 100755 index 00000000000..2e0b535b901 --- /dev/null +++ b/scripts/vm/hypervisor/kvm/kvmvmactivity.sh @@ -0,0 +1,134 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +help() { + printf "Usage: $0 + -i nfs server ip + -p nfs server path + -m mount point + -h host + -u volume uuid list + -t time on ms + -d suspect time\n" + exit 1 +} + +#set -x + +NfsSvrIP= +NfsSvrPath= +MountPoint= +HostIP= +UUIDList= +MSTime= +SuspectTime= + +while getopts 'i:p:m:u:t:h:d:' OPTION +do + case $OPTION in + i) + NfsSvrIP="$OPTARG" + ;; + p) + NfsSvrPath="$OPTARG" + ;; + m) + MountPoint="$OPTARG" + ;; + h) + HostIP="$OPTARG" + ;; + u) + UUIDList="$OPTARG" + ;; + t) + MSTime="$OPTARG" + ;; + d) + SuspectTime="$OPTARG" + ;; + *) + help + ;; + esac +done + +if [ -z "$NfsSvrIP" ] +then + exit 2 +fi + +if [ -z "$SuspectTime" ] +then + exit 2 +fi + +hbFile="$MountPoint/KVMHA/hb-$HostIP" +acFile="$MountPoint/KVMHA/ac-$HostIP" + +# First check: heartbeat file +now=$(date +%s) +hb=$(cat $hbFile) +diff=$(expr $now - $hb) +if [ $diff -lt 61 ] +then + echo "=====> ALIVE <=====" + exit 0 +fi + +if [ -z "$UUIDList" ] +then + echo "=====> DEAD <======" + exit 0 +fi + +# Second check: disk activity check +cd $MountPoint +latestUpdateTime=$(stat -c %Y $(echo $UUIDList | sed 's/,/ /g') | sort -nr | head -1) + +if [ ! -f $acFile ]; then + echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile + + if [[ $latestUpdateTime -gt $SuspectTime ]]; then + echo "=====> ALIVE <=====" + else + echo "=====> DEAD <======" + fi +else + acTime=$(cat $acFile) + arrTime=(${acTime//:/ }) + lastSuspectTime=${arrTime[0]} + lastUpdateTime=${arrTime[1]} + echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile + + if [[ $lastSuspectTime -ne $SuspectTime ]]; then + if [[ $latestUpdateTime -gt $SuspectTime ]]; then + echo "=====> ALIVE <=====" + else + echo "=====> DEAD <======" + fi + else + if [[ $latestUpdateTime -gt $lastUpdateTime ]]; then + echo "=====> ALIVE <=====" + else + echo "=====> DEAD <======" + fi + fi +fi + +exit 0 diff --git a/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 8a8d6452c23..ca9ed579dc1 100644 --- a/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -74,6 +74,11 @@ + + + + + diff --git a/server/src/com/cloud/alert/AlertManagerImpl.java b/server/src/com/cloud/alert/AlertManagerImpl.java index 0232843050d..a58a4f8b0f9 100644 --- a/server/src/com/cloud/alert/AlertManagerImpl.java +++ b/server/src/com/cloud/alert/AlertManagerImpl.java @@ -760,6 +760,7 @@ public class AlertManagerImpl extends ManagerBase implements AlertManager, Confi (alertType != AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED) && (alertType != AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED) && (alertType != AlertManager.AlertType.ALERT_TYPE_OOBM_AUTH_ERROR) && + (alertType != AlertManager.AlertType.ALERT_TYPE_HA_ACTION) && (alertType != AlertManager.AlertType.ALERT_TYPE_CA_CERT)) { alert = _alertDao.getLastAlert(alertType.getType(), dataCenterId, podId, clusterId); } diff --git a/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java b/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java index 3d8cc948944..8fc3e422d6a 100644 --- a/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java +++ b/server/src/com/cloud/api/query/dao/HostJoinDaoImpl.java @@ -35,6 +35,8 @@ import org.apache.cloudstack.api.response.HostForMigrationResponse; import org.apache.cloudstack.api.response.HostResponse; import org.apache.cloudstack.api.response.VgpuResponse; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.dao.HAConfigDao; import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao; import com.cloud.api.ApiDBUtils; @@ -59,6 +61,8 @@ public class HostJoinDaoImpl extends GenericDaoBase implements @Inject private HostDetailsDao hostDetailsDao; @Inject + private HAConfigDao haConfigDao; + @Inject private OutOfBandManagementDao outOfBandManagementDao; private final SearchBuilder hostSearch; @@ -231,6 +235,7 @@ public class HostJoinDaoImpl extends GenericDaoBase implements } } + hostResponse.setHostHAResponse(haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host)); hostResponse.setOutOfBandManagementResponse(outOfBandManagementDao.findByHost(host.getId())); hostResponse.setResourceState(host.getResourceState().toString()); diff --git a/server/src/com/cloud/api/query/vo/HostJoinVO.java b/server/src/com/cloud/api/query/vo/HostJoinVO.java index dcd058fce10..ea2e5185c76 100644 --- a/server/src/com/cloud/api/query/vo/HostJoinVO.java +++ b/server/src/com/cloud/api/query/vo/HostJoinVO.java @@ -36,6 +36,7 @@ import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.org.Cluster; import com.cloud.resource.ResourceState; import com.cloud.utils.db.GenericDao; +import org.apache.cloudstack.ha.HAConfig; import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement; /** @@ -99,6 +100,15 @@ public class HostJoinVO extends BaseViewVO implements InternalIdentity, Identity @Enumerated(value = EnumType.STRING) private OutOfBandManagement.PowerState outOfBandManagementPowerState; + @Column(name = "ha_enabled") + private boolean hostHAEnabled = false; + + @Column(name = "ha_state") + private HAConfig.HAState hostHAState; + + @Column(name = "ha_provider") + private String hostHAProvider; + @Column(name = "resource_state") @Enumerated(value = EnumType.STRING) private ResourceState resourceState; @@ -260,6 +270,18 @@ public class HostJoinVO extends BaseViewVO implements InternalIdentity, Identity return outOfBandManagementPowerState; } + public boolean isHostHAEnabled() { + return hostHAEnabled; + } + + public HAConfig.HAState getHostHAState() { + return hostHAState; + } + + public String getHostHAProvider() { + return hostHAProvider; + } + public ResourceState getResourceState() { return resourceState; } diff --git a/server/src/com/cloud/storage/StorageManagerImpl.java b/server/src/com/cloud/storage/StorageManagerImpl.java index bee2c3ab3d8..ac20f0f36ee 100644 --- a/server/src/com/cloud/storage/StorageManagerImpl.java +++ b/server/src/com/cloud/storage/StorageManagerImpl.java @@ -1032,7 +1032,6 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C Command[] cmdArray = cmds.toCommands(); for (Command cmd : cmdArray) { long targetHostId = _hvGuruMgr.getGuruProcessedCommandTargetHost(hostId, cmd); - answers.add(_agentMgr.send(targetHostId, cmd)); } return new Pair(hostId, answers.toArray(new Answer[answers.size()])); @@ -2347,8 +2346,6 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C " for template id " +templateOnImageStore.getTemplateId(), th); } } - - } // get bytesReadRate from service_offering, disk_offering and vm.disk.throttling.bytes_read_rate diff --git a/server/src/org/apache/cloudstack/ha/HAManager.java b/server/src/org/apache/cloudstack/ha/HAManager.java new file mode 100644 index 00000000000..3de7b49dd98 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/HAManager.java @@ -0,0 +1,77 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.ha.Investigator; +import com.cloud.host.Host; +import com.cloud.host.Status; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.ha.provider.HAProvider; + +public interface HAManager extends HAConfigManager { + + ConfigKey MaxConcurrentHealthCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.health.check.operations", + "50", + "The number of concurrent health check operations per management server. This setting determines the size of the thread pool consuming the HEALTH CHECK queue.", true); + + ConfigKey MaxPendingHealthCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.health.check.operations", + "5000", + "The number of pending health check operations per management server. This setting determines the size of the HEALTH CHECK queue.", true); + + ConfigKey MaxConcurrentActivityCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.activity.check.operations", + "25", + "The number of concurrent activity check operations per management server. This setting determines the size of the thread pool consuming the ACTIVITY CHECK queue.", + true); + + ConfigKey MaxPendingActivityCheckOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.activity.check.operations", + "2500", + "The number of pending activity check operations per management server. This setting determines the size of the size of the ACTIVITY CHECK queue.", true); + + ConfigKey MaxConcurrentRecoveryOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.recovery.operations", + "25", + "The number of concurrent recovery operations per management server.", true); + + ConfigKey MaxPendingRecoveryOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.recovery.operations", + "2500", + "The number of pending recovery operations per management server. This setting determines the size of the size of the RECOVERY queue.", true); + + ConfigKey MaxConcurrentFenceOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.concurrent.fence.operations", + "25", + "The number of concurrent fence operations per management server.", true); + + ConfigKey MaxPendingFenceOperations = new ConfigKey<>("Advanced", Integer.class, + "ha.max.pending.fence.operations", + "2500", + "The number of pending fence operations per management server. This setting determines the size of the size of the FENCE queue.", true); + + boolean transitionHAState(final HAConfig.Event event, final HAConfig haConfig); + HAProvider getHAProvider(final String name); + HAResourceCounter getHACounter(final Long resourceId, final HAResource.ResourceType resourceType); + void purgeHACounter(final Long resourceId, final HAResource.ResourceType resourceType); + + boolean isHAEligible(final HAResource resource); + Boolean isVMAliveOnHost(final Host host) throws Investigator.UnknownVM; + Status getHostStatus(final Host host); +} \ No newline at end of file diff --git a/server/src/org/apache/cloudstack/ha/HAManagerImpl.java b/server/src/org/apache/cloudstack/ha/HAManagerImpl.java new file mode 100644 index 00000000000..ad3438b9e60 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/HAManagerImpl.java @@ -0,0 +1,744 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import com.cloud.cluster.ClusterManagerListener; +import com.cloud.cluster.ManagementServerHost; +import com.cloud.dc.ClusterDetailsDao; +import com.cloud.dc.ClusterDetailsVO; +import com.cloud.dc.DataCenter; +import com.cloud.dc.DataCenterDetailVO; +import com.cloud.dc.dao.DataCenterDetailsDao; +import com.cloud.domain.Domain; +import com.cloud.event.ActionEvent; +import com.cloud.event.ActionEventUtils; +import com.cloud.event.EventTypes; +import com.cloud.ha.Investigator; +import com.cloud.host.Host; +import com.cloud.host.Status; +import com.cloud.host.dao.HostDao; +import com.cloud.org.Cluster; +import com.cloud.utils.component.ComponentContext; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.component.PluggableService; +import com.cloud.utils.db.Transaction; +import com.cloud.utils.db.TransactionCallback; +import com.cloud.utils.db.TransactionStatus; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.fsm.NoTransitionException; +import com.google.common.base.Preconditions; +import com.google.common.base.Strings; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.command.admin.ha.ConfigureHAForHostCmd; +import org.apache.cloudstack.api.command.admin.ha.DisableHAForClusterCmd; +import org.apache.cloudstack.api.command.admin.ha.DisableHAForHostCmd; +import org.apache.cloudstack.api.command.admin.ha.DisableHAForZoneCmd; +import org.apache.cloudstack.api.command.admin.ha.EnableHAForClusterCmd; +import org.apache.cloudstack.api.command.admin.ha.EnableHAForHostCmd; +import org.apache.cloudstack.api.command.admin.ha.EnableHAForZoneCmd; +import org.apache.cloudstack.api.command.admin.ha.ListHostHAProvidersCmd; +import org.apache.cloudstack.api.command.admin.ha.ListHostHAResourcesCmd; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; +import org.apache.cloudstack.ha.dao.HAConfigDao; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HAProvider.HAProviderConfig; +import org.apache.cloudstack.ha.task.ActivityCheckTask; +import org.apache.cloudstack.ha.task.FenceTask; +import org.apache.cloudstack.ha.task.HealthCheckTask; +import org.apache.cloudstack.ha.task.RecoveryTask; +import org.apache.cloudstack.kernel.Partition; +import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.cloudstack.poll.BackgroundPollManager; +import org.apache.cloudstack.poll.BackgroundPollTask; +import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import javax.naming.ConfigurationException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ArrayBlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; + +public final class HAManagerImpl extends ManagerBase implements HAManager, ClusterManagerListener, PluggableService, Configurable { + public static final Logger LOG = Logger.getLogger(HAManagerImpl.class); + + @Inject + private HAConfigDao haConfigDao; + + @Inject + private HostDao hostDao; + + @Inject + private ClusterDetailsDao clusterDetailsDao; + + @Inject + private DataCenterDetailsDao dataCenterDetailsDao; + + @Inject + private BackgroundPollManager pollManager; + + private List> haProviders; + private Map> haProviderMap = new HashMap<>(); + + private static ExecutorService healthCheckExecutor; + private static ExecutorService activityCheckExecutor; + private static ExecutorService recoveryExecutor; + private static ExecutorService fenceExecutor; + + private static final String HA_ENABLED_DETAIL = "resourceHAEnabled"; + + ////////////////////////////////////////////////////// + //////////////// HA Manager methods ////////////////// + ////////////////////////////////////////////////////// + + public Map haCounterMap = new ConcurrentHashMap<>(); + + public HAProvider getHAProvider(final String name) { + return haProviderMap.get(name); + } + + private String resourceCounterKey(final Long resourceId, final HAResource.ResourceType resourceType) { + return resourceId.toString() + resourceType.toString(); + } + + public synchronized HAResourceCounter getHACounter(final Long resourceId, final HAResource.ResourceType resourceType) { + final String key = resourceCounterKey(resourceId, resourceType); + if (!haCounterMap.containsKey(key)) { + haCounterMap.put(key, new HAResourceCounter()); + } + return haCounterMap.get(key); + } + + public synchronized void purgeHACounter(final Long resourceId, final HAResource.ResourceType resourceType) { + final String key = resourceCounterKey(resourceId, resourceType); + if (haCounterMap.containsKey(key)) { + haCounterMap.remove(key); + } + } + + public boolean transitionHAState(final HAConfig.Event event, final HAConfig haConfig) { + if (event == null || haConfig == null) { + return false; + } + final HAConfig.HAState currentHAState = haConfig.getState(); + try { + final HAConfig.HAState nextState = HAConfig.HAState.getStateMachine().getNextState(currentHAState, event); + boolean result = HAConfig.HAState.getStateMachine().transitTo(haConfig, event, null, haConfigDao); + if (result) { + final String message = String.format("Transitioned host HA state from:%s to:%s due to event:%s for the host id:%d", + currentHAState, nextState, event, haConfig.getResourceId()); + LOG.debug(message); + if (nextState == HAConfig.HAState.Recovering || nextState == HAConfig.HAState.Fencing || nextState == HAConfig.HAState.Fenced) { + ActionEventUtils.onActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), + Domain.ROOT_DOMAIN, EventTypes.EVENT_HA_STATE_TRANSITION, message); + } + } + return result; + } catch (NoTransitionException e) { + if (LOG.isTraceEnabled()) { + LOG.trace("Unable to find next HA state for current HA state: " + currentHAState + " for event: " + event + " for host" + haConfig.getResourceId()); + } + } + return false; + } + + private boolean transitionResourceStateToDisabled(final Partition partition) { + List resources; + if (partition.partitionType() == Partition.PartitionType.Cluster) { + resources = hostDao.findByClusterId(partition.getId()); + } else if (partition.partitionType() == Partition.PartitionType.Zone) { + resources = hostDao.findByDataCenterId(partition.getId()); + } else { + return true; + } + + boolean result = true; + for (final HAResource resource: resources) { + result = result && transitionHAState(HAConfig.Event.Disabled, + haConfigDao.findHAResource(resource.getId(), resource.resourceType())); + } + return result; + } + + private boolean checkHAOwnership(final HAConfig haConfig) { + // Skip for resources not owned by this mgmt server + return !(haConfig.getManagementServerId() != null + && haConfig.getManagementServerId() != ManagementServerNode.getManagementServerId()); + } + + private HAResource validateAndFindHAResource(final HAConfig haConfig) { + HAResource resource = null; + if (haConfig.getResourceType() == HAResource.ResourceType.Host) { + final Host host = hostDao.findById(haConfig.getResourceId()); + if (host != null && host.getRemoved() != null) { + return null; + } + resource = host; + if (resource == null && haConfig.getState() != HAConfig.HAState.Disabled) { + disableHA(haConfig.getResourceId(), haConfig.getResourceType()); + return null; + } + } + if (!haConfig.isEnabled() || !isHAEnabledForZone(resource) || !isHAEnabledForCluster(resource)) { + if (haConfig.getState() != HAConfig.HAState.Disabled) { + if (transitionHAState(HAConfig.Event.Disabled, haConfig) ) { + purgeHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + } + } + return null; + } else if (haConfig.getState() == HAConfig.HAState.Disabled) { + transitionHAState(HAConfig.Event.Enabled, haConfig); + } + return resource; + } + + private HAProvider validateAndFindHAProvider(final HAConfig haConfig, final HAResource resource) { + final HAProvider haProvider = haProviderMap.get(haConfig.getHaProvider()); + if (haProvider != null && !haProvider.isEligible(resource)) { + if (haConfig.getState() != HAConfig.HAState.Ineligible) { + transitionHAState(HAConfig.Event.Ineligible, haConfig); + } + return null; + } else if (haConfig.getState() == HAConfig.HAState.Ineligible) { + transitionHAState(HAConfig.Event.Eligible, haConfig); + } + return haProvider; + } + + public boolean isHAEnabledForZone(final HAResource resource) { + if (resource == null || resource.getDataCenterId() < 1L) { + return true; + } + final DataCenterDetailVO zoneDetails = dataCenterDetailsDao.findDetail(resource.getDataCenterId(), HA_ENABLED_DETAIL); + return zoneDetails == null || Strings.isNullOrEmpty(zoneDetails.getValue()) || Boolean.valueOf(zoneDetails.getValue()); + } + + private boolean isHAEnabledForCluster(final HAResource resource) { + if (resource == null || resource.getClusterId() == null) { + return true; + } + final ClusterDetailsVO clusterDetails = clusterDetailsDao.findDetail(resource.getClusterId(), HA_ENABLED_DETAIL); + return clusterDetails == null || Strings.isNullOrEmpty(clusterDetails.getValue()) || Boolean.valueOf(clusterDetails.getValue()); + } + + private boolean isHAEligibleForResource(final HAResource resource) { + if (resource == null || resource.getId() < 1L) { + return false; + } + HAResource.ResourceType resourceType = null; + if (resource instanceof Host) { + resourceType = HAResource.ResourceType.Host; + } + if (resourceType == null) { + return false; + } + final HAConfig haConfig = haConfigDao.findHAResource(resource.getId(), resourceType); + return haConfig != null && haConfig.isEnabled() + && haConfig.getState() != HAConfig.HAState.Disabled + && haConfig.getState() != HAConfig.HAState.Ineligible; + } + + public boolean isHAEligible(final HAResource resource) { + return resource != null && isHAEnabledForZone(resource) + && isHAEnabledForCluster(resource) + && isHAEligibleForResource(resource); + } + + public void validateHAProviderConfigForResource(final Long resourceId, final HAResource.ResourceType resourceType, final HAProvider haProvider) { + if (HAResource.ResourceType.Host.equals(resourceType)) { + final Host host = hostDao.findById(resourceId); + if (host.getHypervisorType() == null || haProvider.resourceSubType() == null || !host.getHypervisorType().toString().equals(haProvider.resourceSubType().toString())) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Incompatible haprovider provided for the resource of hypervisor type:" + host.getHypervisorType()); + } + } + } + + //////////////////////////////////////////////////////////////////// + //////////////// HA Investigator wrapper for Old HA //////////////// + //////////////////////////////////////////////////////////////////// + + public Boolean isVMAliveOnHost(final Host host) throws Investigator.UnknownVM { + final HAConfig haConfig = haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host); + if (haConfig != null) { + if (haConfig.getState() == HAConfig.HAState.Fenced) { + if (LOG.isDebugEnabled()){ + LOG.debug("HA: Host is fenced " + host.getId()); + } + return false; + } + if (LOG.isDebugEnabled()){ + LOG.debug("HA: HOST is alive " + host.getId()); + } + return true; + } + throw new Investigator.UnknownVM(); + } + + public Status getHostStatus(final Host host) { + final HAConfig haConfig = haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host); + if (haConfig != null) { + if (haConfig.getState() == HAConfig.HAState.Fenced) { + if (LOG.isDebugEnabled()){ + LOG.debug("HA: Agent is available/suspect/checking Up " + host.getId()); + } + return Status.Down; + } else if (haConfig.getState() == HAConfig.HAState.Degraded || haConfig.getState() == HAConfig.HAState.Recovering || haConfig.getState() == HAConfig.HAState.Recovered || haConfig.getState() == HAConfig.HAState.Fencing) { + if (LOG.isDebugEnabled()){ + LOG.debug("HA: Agent is disconnected " + host.getId()); + } + return Status.Disconnected; + } + return Status.Up; + } + return Status.Unknown; + } + + ////////////////////////////////////////////////////// + //////////////// HA API handlers ///////////////////// + ////////////////////////////////////////////////////// + + private boolean configureHA(final Long resourceId, final HAResource.ResourceType resourceType, final Boolean enable, final String haProvider) { + return Transaction.execute(new TransactionCallback() { + @Override + public Boolean doInTransaction(TransactionStatus status) { + HAConfigVO haConfig = (HAConfigVO) haConfigDao.findHAResource(resourceId, resourceType); + if (haConfig == null) { + haConfig = new HAConfigVO(); + if (haProvider != null) { + haConfig.setHaProvider(haProvider); + } + if (enable != null) { + haConfig.setEnabled(enable); + haConfig.setManagementServerId(ManagementServerNode.getManagementServerId()); + } + haConfig.setResourceId(resourceId); + haConfig.setResourceType(resourceType); + if (Strings.isNullOrEmpty(haConfig.getHaProvider())) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "HAProvider is not provided for the resource, failing configuration."); + } + if (haConfigDao.persist(haConfig) != null) { + return true; + } + } else { + if (enable != null) { + haConfig.setEnabled(enable); + } + if (haProvider != null) { + haConfig.setHaProvider(haProvider); + } + if (Strings.isNullOrEmpty(haConfig.getHaProvider())) { + throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "HAProvider is not provided for the resource, failing configuration."); + } + return haConfigDao.update(haConfig.getId(), haConfig); + } + return false; + } + }); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_CONFIGURE, eventDescription = "configuring HA for resource") + public boolean configureHA(final Long resourceId, final HAResource.ResourceType resourceType, final String haProvider) { + Preconditions.checkArgument(resourceId != null && resourceId > 0L); + Preconditions.checkArgument(resourceType != null); + Preconditions.checkArgument(!Strings.isNullOrEmpty(haProvider)); + + if (!haProviderMap.containsKey(haProvider.toLowerCase())) { + throw new CloudRuntimeException("Given HA provider does not exist."); + } + validateHAProviderConfigForResource(resourceId, resourceType, haProviderMap.get(haProvider.toLowerCase())); + return configureHA(resourceId, resourceType, null, haProvider.toLowerCase()); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for resource") + public boolean enableHA(final Long resourceId, final HAResource.ResourceType resourceType) { + Preconditions.checkArgument(resourceId != null && resourceId > 0L); + Preconditions.checkArgument(resourceType != null); + return configureHA(resourceId, resourceType, true, null); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for resource") + public boolean disableHA(final Long resourceId, final HAResource.ResourceType resourceType) { + Preconditions.checkArgument(resourceId != null && resourceId > 0L); + Preconditions.checkArgument(resourceType != null); + boolean result = configureHA(resourceId, resourceType, false, null); + if (result) { + transitionHAState(HAConfig.Event.Disabled, haConfigDao.findHAResource(resourceId, resourceType)); + purgeHACounter(resourceId, resourceType); + } + return result; + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for a cluster") + public boolean enableHA(final Cluster cluster) { + clusterDetailsDao.persist(cluster.getId(), HA_ENABLED_DETAIL, String.valueOf(true)); + return true; + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for a cluster") + public boolean disableHA(final Cluster cluster) { + clusterDetailsDao.persist(cluster.getId(), HA_ENABLED_DETAIL, String.valueOf(false)); + return transitionResourceStateToDisabled(cluster); + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for a zone") + public boolean enableHA(final DataCenter zone) { + dataCenterDetailsDao.persist(zone.getId(), HA_ENABLED_DETAIL, String.valueOf(true)); + return true; + } + + @Override + @ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for a zone") + public boolean disableHA(final DataCenter zone) { + dataCenterDetailsDao.persist(zone.getId(), HA_ENABLED_DETAIL, String.valueOf(false)); + return transitionResourceStateToDisabled(zone); + } + + @Override + public List listHAResources(final Long resourceId, final HAResource.ResourceType resourceType) { + return haConfigDao.listHAResource(resourceId, resourceType); + } + + @Override + public List listHAProviders(final HAResource.ResourceType resourceType, final HAResource.ResourceSubType entityType) { + final List haProviderNames = new ArrayList<>(); + for (final HAProvider haProvider : haProviders) { + if (haProvider.resourceType().equals(resourceType) && haProvider.resourceSubType().equals(entityType)) { + haProviderNames.add(haProvider.getClass().getSimpleName()); + } + } + return haProviderNames; + } + + @Override + public List> getCommands() { + List> cmdList = new ArrayList<>(); + cmdList.add(ConfigureHAForHostCmd.class); + cmdList.add(EnableHAForHostCmd.class); + cmdList.add(EnableHAForClusterCmd.class); + cmdList.add(EnableHAForZoneCmd.class); + cmdList.add(DisableHAForHostCmd.class); + cmdList.add(DisableHAForClusterCmd.class); + cmdList.add(DisableHAForZoneCmd.class); + cmdList.add(ListHostHAResourcesCmd.class); + cmdList.add(ListHostHAProvidersCmd.class); + return cmdList; + } + + ////////////////////////////////////////////////////////////////// + //////////////// Clustered Manager Listeners ///////////////////// + ////////////////////////////////////////////////////////////////// + + @Override + public void onManagementNodeJoined(List nodeList, long selfNodeId) { + + } + + @Override + public void onManagementNodeLeft(List nodeList, long selfNodeId) { + + } + + @Override + public void onManagementNodeIsolated() { + + } + + /////////////////////////////////////////////////// + //////////////// Manager Init ///////////////////// + /////////////////////////////////////////////////// + + @Override + public boolean start() { + haProviderMap.clear(); + for (final HAProvider haProvider : haProviders) { + haProviderMap.put(haProvider.getClass().getSimpleName().toLowerCase(), haProvider); + } + return true; + } + + @Override + public boolean stop() { + haConfigDao.expireServerOwnership(ManagementServerNode.getManagementServerId()); + return true; + } + + @Override + public boolean configure(final String name, final Map params) throws ConfigurationException { + // Health Check + final int healthCheckWorkers = MaxConcurrentHealthCheckOperations.value(); + final int healthCheckQueueSize = MaxPendingHealthCheckOperations.value(); + healthCheckExecutor = new ThreadPoolExecutor(healthCheckWorkers, healthCheckWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(healthCheckQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + // Activity Check + final int activityCheckWorkers = MaxConcurrentActivityCheckOperations.value(); + final int activityCheckQueueSize = MaxPendingActivityCheckOperations.value(); + activityCheckExecutor = new ThreadPoolExecutor(activityCheckWorkers, activityCheckWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(activityCheckQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + // Recovery + final int recoveryOperationWorkers = MaxConcurrentRecoveryOperations.value(); + final int recoveryOperationQueueSize = MaxPendingRecoveryOperations.value(); + recoveryExecutor = new ThreadPoolExecutor(recoveryOperationWorkers, recoveryOperationWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(recoveryOperationQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + // Fence + final int fenceOperationWorkers = MaxConcurrentFenceOperations.value(); + final int fenceOperationQueueSize = MaxPendingFenceOperations.value(); + fenceExecutor = new ThreadPoolExecutor(fenceOperationWorkers, fenceOperationWorkers, + 0L, TimeUnit.MILLISECONDS, + new ArrayBlockingQueue(fenceOperationQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy()); + + pollManager.submitTask(new HealthCheckPollTask()); + pollManager.submitTask(new ActivityCheckPollTask()); + pollManager.submitTask(new RecoveryPollTask()); + pollManager.submitTask(new FencingPollTask()); + + LOG.debug("HA manager has been configured"); + return true; + } + + public void setHaProviders(List> haProviders) { + this.haProviders = haProviders; + } + + @Override + public String getConfigComponentName() { + return HAManager.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[] { + MaxConcurrentHealthCheckOperations, + MaxPendingHealthCheckOperations, + MaxConcurrentActivityCheckOperations, + MaxPendingActivityCheckOperations, + MaxConcurrentRecoveryOperations, + MaxPendingRecoveryOperations, + MaxConcurrentFenceOperations, + MaxPendingFenceOperations + }; + } + + ///////////////////////////////////////////////// + //////////////// Poll Tasks ///////////////////// + ///////////////////////////////////////////////// + + private final class HealthCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA health check task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + + if (haConfig.getState() == HAConfig.HAState.Suspect) { + if (counter.canPerformActivityCheck((Long)(haProvider.getConfigValue(HAProviderConfig.MaxActivityCheckInterval, resource)))) { + transitionHAState(HAConfig.Event.PerformActivityCheck, haConfig); + } + } + + if (haConfig.getState() == HAConfig.HAState.Degraded) { + if (counter.canRecheckActivity((Long)(haProvider.getConfigValue(HAProviderConfig.MaxDegradedWaitTimeout, resource)))) { + transitionHAState(HAConfig.Event.PeriodicRecheckResourceActivity, haConfig); + } + } + + switch (haConfig.getState()) { + case Available: + case Suspect: + case Degraded: + case Fenced: + final HealthCheckTask task = ComponentContext.inject(new HealthCheckTask(resource, haProvider, haConfig, + HAProviderConfig.HealthCheckTimeout, healthCheckExecutor)); + healthCheckExecutor.submit(task); + break; + default: + break; + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform health checks in HA manager", t); + } + } + } + + private final class ActivityCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA activity check task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + if (haConfig.getState() == HAConfig.HAState.Checking) { + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + final ActivityCheckTask job = ComponentContext.inject(new ActivityCheckTask(resource, haProvider, haConfig, + HAProviderConfig.ActivityCheckTimeout, activityCheckExecutor, counter.getSuspectTimeStamp())); + activityCheckExecutor.submit(job); + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform activity checks in HA manager", t); + } + } + } + + private final class RecoveryPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA recovery task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (haConfig.getState() == HAConfig.HAState.Recovering) { + if (counter.canAttemptRecovery()) { + if (counter.getRecoveryCounter() >= (Long)(haProvider.getConfigValue(HAProviderConfig.MaxRecoveryAttempts, resource))) { + transitionHAState(HAConfig.Event.RecoveryOperationThresholdExceeded, haConfig); + continue; + } + + final RecoveryTask task = ComponentContext.inject(new RecoveryTask(resource, haProvider, haConfig, + HAProviderConfig.RecoveryTimeout, recoveryExecutor)); + final Future recoveryFuture = recoveryExecutor.submit(task); + counter.setRecoveryFuture(recoveryFuture); + counter.incrRecoveryCounter(); + } + } + if (haConfig.getState() == HAConfig.HAState.Recovered) { + counter.markRecoveryStarted(); + if (counter.canExitRecovery((Long)(haProvider.getConfigValue(HAProviderConfig.RecoveryWaitTimeout, resource)))) { + transitionHAState(HAConfig.Event.RecoveryWaitPeriodTimeout, haConfig); + counter.markRecoveryCompleted(); + } + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform recovery operation in HA manager", t); + } + } + } + + private final class FencingPollTask extends ManagedContextRunnable implements BackgroundPollTask { + @Override + protected void runInContext() { + try { + if (LOG.isTraceEnabled()) { + LOG.trace("HA fencing task is running..."); + } + final List haConfigList = new ArrayList(haConfigDao.listAll()); + for (final HAConfig haConfig : haConfigList) { + if (!checkHAOwnership(haConfig)) { + continue; + } + + final HAResource resource = validateAndFindHAResource(haConfig); + if (resource == null) { + continue; + } + + final HAProvider haProvider = validateAndFindHAProvider(haConfig, resource); + if (haProvider == null) { + continue; + } + + final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (counter.lastFencingCompleted()) { + if (haConfig.getState() == HAConfig.HAState.Fencing) { + final FenceTask task = ComponentContext.inject(new FenceTask(resource, haProvider, haConfig, + HAProviderConfig.FenceTimeout, fenceExecutor)); + final Future fenceFuture = fenceExecutor.submit(task); + counter.setFenceFuture(fenceFuture); + } + } + } + } catch (Throwable t) { + LOG.error("Error trying to perform fencing operation in HA manager", t); + } + } + } +} diff --git a/server/src/org/apache/cloudstack/ha/HAResourceCounter.java b/server/src/org/apache/cloudstack/ha/HAResourceCounter.java new file mode 100644 index 00000000000..f955fd2f8fd --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/HAResourceCounter.java @@ -0,0 +1,128 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha; + +import java.util.concurrent.Future; +import java.util.concurrent.atomic.AtomicLong; + +public final class HAResourceCounter { + private AtomicLong activityCheckCounter = new AtomicLong(0); + private AtomicLong activityCheckFailureCounter = new AtomicLong(0); + private AtomicLong recoveryOperationCounter = new AtomicLong(0); + + private Long firstHealthCheckFailureTimestamp; + private Long lastActivityCheckTimestamp; + private Long degradedTimestamp; + private Long recoverTimestamp; + private Future recoveryFuture; + private Future fenceFuture; + + public long getActivityCheckCounter() { + return activityCheckCounter.get(); + } + + public long getRecoveryCounter() { + return recoveryOperationCounter.get(); + } + + public synchronized void incrActivityCounter(final boolean isFailure) { + lastActivityCheckTimestamp = System.currentTimeMillis(); + activityCheckCounter.incrementAndGet(); + if (isFailure) { + activityCheckFailureCounter.incrementAndGet(); + } + } + + public synchronized void incrRecoveryCounter() { + recoveryOperationCounter.incrementAndGet(); + } + + public synchronized void resetActivityCounter() { + activityCheckCounter.set(0); + activityCheckFailureCounter.set(0); + } + + public synchronized void resetRecoveryCounter() { + recoverTimestamp = null; + recoveryFuture = null; + recoveryOperationCounter.set(0); + } + + public synchronized void resetSuspectTimestamp() { + firstHealthCheckFailureTimestamp = null; + } + + public boolean hasActivityThresholdExceeded(final double failureRatio) { + return activityCheckFailureCounter.get() > (activityCheckCounter.get() * failureRatio); + } + + public boolean canPerformActivityCheck(final Long activityCheckInterval) { + return lastActivityCheckTimestamp == null || (System.currentTimeMillis() - lastActivityCheckTimestamp) > (activityCheckInterval * 1000); + } + + public boolean canRecheckActivity(final Long maxDegradedPeriod) { + return degradedTimestamp == null || (System.currentTimeMillis() - degradedTimestamp) > (maxDegradedPeriod * 1000); + } + + public boolean canExitRecovery(final Long maxRecoveryWaitPeriod) { + return recoverTimestamp != null && (System.currentTimeMillis() - recoverTimestamp) > (maxRecoveryWaitPeriod * 1000); + } + + public long getSuspectTimeStamp() { + if (firstHealthCheckFailureTimestamp == null) { + firstHealthCheckFailureTimestamp = System.currentTimeMillis(); + } + return firstHealthCheckFailureTimestamp; + } + + public synchronized void markResourceSuspected() { + firstHealthCheckFailureTimestamp = System.currentTimeMillis(); + } + + public synchronized void markResourceDegraded() { + degradedTimestamp = System.currentTimeMillis(); + } + + public synchronized void markRecoveryStarted() { + if (recoverTimestamp == null) { + recoverTimestamp = System.currentTimeMillis(); + } + } + + public synchronized void markRecoveryCompleted() { + recoverTimestamp = null; + recoveryFuture = null; + } + + public void setRecoveryFuture(final Future future) { + recoveryFuture = future; + } + + public boolean canAttemptRecovery() { + return recoveryFuture == null || recoveryFuture.isDone(); + } + + public void setFenceFuture(final Future future) { + fenceFuture = future; + } + + public boolean lastFencingCompleted() { + return fenceFuture == null || fenceFuture.isDone(); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/ActivityCheckerInterface.java b/server/src/org/apache/cloudstack/ha/provider/ActivityCheckerInterface.java new file mode 100644 index 00000000000..1f280297436 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/ActivityCheckerInterface.java @@ -0,0 +1,35 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import org.joda.time.DateTime; + +import org.apache.cloudstack.ha.HAResource; + +import com.cloud.utils.component.Adapter; + +/** + * Checking activity requires deeper investigation. This will be invoked when a health check has failed. + * + * @param + */ +public interface ActivityCheckerInterface extends Adapter { + + boolean isActive(R r, DateTime t) throws HACheckerException ; + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HACheckerException.java b/server/src/org/apache/cloudstack/ha/provider/HACheckerException.java new file mode 100644 index 00000000000..9e736221d63 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HACheckerException.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.ha.provider; + +public class HACheckerException extends Exception { + + private static final long serialVersionUID = 1L; + + public HACheckerException(String string, Exception e) { + super(string, e); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HAFenceException.java b/server/src/org/apache/cloudstack/ha/provider/HAFenceException.java new file mode 100644 index 00000000000..80a7c3be92a --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HAFenceException.java @@ -0,0 +1,29 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + + +package org.apache.cloudstack.ha.provider; + +public class HAFenceException extends Exception { + + private static final long serialVersionUID = 1L; + + public HAFenceException(String string, Exception e) { + super(string, e); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HAProvider.java b/server/src/org/apache/cloudstack/ha/provider/HAProvider.java new file mode 100644 index 00000000000..bcc590c965f --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HAProvider.java @@ -0,0 +1,65 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import com.cloud.utils.component.Adapter; + +import org.apache.cloudstack.ha.HAConfig; +import org.joda.time.DateTime; + +import org.apache.cloudstack.ha.HAResource; + +public interface HAProvider extends Adapter { + + enum HAProviderConfig { + HealthCheckTimeout, + ActivityCheckTimeout, + RecoveryTimeout, + FenceTimeout, + ActivityCheckFailureRatio, + MaxActivityChecks, + MaxRecoveryAttempts, + MaxActivityCheckInterval, + MaxDegradedWaitTimeout, + RecoveryWaitTimeout + }; + + HAResource.ResourceType resourceType(); + + HAResource.ResourceSubType resourceSubType(); + + boolean isDisabled(R r); + + boolean isInMaintenanceMode(R r); + + boolean isEligible(R r); + + boolean isHealthy(R r) throws HACheckerException; + + boolean hasActivity(R r, DateTime afterThis) throws HACheckerException; + + boolean recover(R r) throws HARecoveryException; + + boolean fence(R r) throws HAFenceException; + + void setFenced(R r); + + void sendAlert(R r, HAConfig.HAState nextState); + + Object getConfigValue(HAProviderConfig name, R r); +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HARecoveryException.java b/server/src/org/apache/cloudstack/ha/provider/HARecoveryException.java new file mode 100644 index 00000000000..893e21c7230 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HARecoveryException.java @@ -0,0 +1,28 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +public class HARecoveryException extends Exception { + + private static final long serialVersionUID = 1L; + + public HARecoveryException(String string, Exception e) { + super(string, e); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HealthCheckerInterface.java b/server/src/org/apache/cloudstack/ha/provider/HealthCheckerInterface.java new file mode 100644 index 00000000000..ec0a5810214 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HealthCheckerInterface.java @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import org.apache.cloudstack.ha.HAResource; + +import com.cloud.utils.component.Adapter; + +/** + * Health checker is a quick way to find out if a resource is active. Like pinging the host or checking agent health. + * + * @param + */ + +public interface HealthCheckerInterface extends Adapter { + + boolean isHealthy(R r); + +} diff --git a/server/src/org/apache/cloudstack/ha/provider/HostHAProvider.java b/server/src/org/apache/cloudstack/ha/provider/HostHAProvider.java new file mode 100644 index 00000000000..4867d92f35d --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/HostHAProvider.java @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider; + +import com.cloud.host.Host; + +public interface HostHAProvider extends HAProvider { +} diff --git a/server/src/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java b/server/src/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java new file mode 100644 index 00000000000..43aa20015fa --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/provider/host/HAAbstractHostProvider.java @@ -0,0 +1,105 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.provider.host; + +import com.cloud.agent.AgentManager; +import com.cloud.alert.AlertManager; +import com.cloud.ha.HighAvailabilityManager; +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.host.Status.Event; +import com.cloud.resource.ResourceManager; +import com.cloud.resource.ResourceState; +import com.cloud.utils.component.AdapterBase; +import com.cloud.utils.fsm.NoTransitionException; +import org.apache.cloudstack.alert.AlertService; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.log4j.Logger; + +import javax.inject.Inject; + +public abstract class HAAbstractHostProvider extends AdapterBase implements HAProvider { + + private final static Logger LOG = Logger.getLogger(HAAbstractHostProvider.class); + + @Inject + private AlertManager alertManager; + @Inject + protected AgentManager agentManager; + @Inject + protected ResourceManager resourceManager; + @Inject + protected HighAvailabilityManager oldHighAvailabilityManager; + + + @Override + public HAResource.ResourceType resourceType() { + return HAResource.ResourceType.Host; + } + + public HAResource.ResourceSubType resourceSubType() { + return HAResource.ResourceSubType.Unknown; + } + + @Override + public boolean isDisabled(final Host host) { + return host.isDisabled(); + } + + @Override + public boolean isInMaintenanceMode(final Host host) { + return host.isInMaintenanceStates(); + } + + @Override + public void setFenced(final Host r) { + if (r.getState() != Status.Down) { + try { + LOG.debug("Trying to disconnect the host without investigation and scheduling HA for the VMs on host id=" + r.getId()); + agentManager.disconnectWithoutInvestigation(r.getId(), Event.HostDown); + oldHighAvailabilityManager.scheduleRestartForVmsOnHost((HostVO)r, true); + } catch (Exception e) { + LOG.error("Failed to disconnect host and schedule HA restart of VMs after fencing the host: ", e); + } + try { + resourceManager.resourceStateTransitTo(r, ResourceState.Event.InternalEnterMaintenance, ManagementServerNode.getManagementServerId()); + } catch (NoTransitionException e) { + LOG.error("Failed to put host in maintenance mode after host-ha fencing and scheduling VM-HA: ", e); + } + } + } + + @Override + public void sendAlert(final Host host, final HAConfig.HAState nextState) { + String subject = "HA operation performed for host"; + String body = subject; + if (HAConfig.HAState.Fencing.equals(nextState)) { + subject = String.format("HA Fencing of host id=%d, in dc id=%d performed", host.getId(), host.getDataCenterId()); + body = String.format("HA Fencing has been performed for host id=%d, uuid=%s in datacenter id=%d", host.getId(), host.getUuid(), host.getDataCenterId()); + } else if (HAConfig.HAState.Recovering.equals(nextState)) { + subject = String.format("HA Recovery of host id=%d, in dc id=%d performed", host.getId(), host.getDataCenterId()); + body = String.format("HA Recovery has been performed for host id=%d, uuid=%s in datacenter id=%d", host.getId(), host.getUuid(), host.getDataCenterId()); + } + alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HA_ACTION, host.getDataCenterId(), host.getPodId(), subject, body); + } + +} diff --git a/server/src/org/apache/cloudstack/ha/task/ActivityCheckTask.java b/server/src/org/apache/cloudstack/ha/task/ActivityCheckTask.java new file mode 100644 index 00000000000..ab8af6124a7 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/ActivityCheckTask.java @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.HAResourceCounter; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HAProvider.HAProviderConfig; +import org.apache.log4j.Logger; + +import javax.inject.Inject; + +import org.joda.time.DateTime; +import java.util.concurrent.ExecutorService; + +public class ActivityCheckTask extends BaseHATask { + + public static final Logger LOG = Logger.getLogger(ActivityCheckTask.class); + + @Inject + private HAManager haManager; + + private final long disconnectTime; + + public ActivityCheckTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig, + final ExecutorService executor, final long disconnectTime) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + this.disconnectTime = disconnectTime; + } + + public boolean performAction() throws HACheckerException { + return getHaProvider().hasActivity(getResource(), new DateTime(disconnectTime)); + } + + public void processResult(boolean result, Throwable t) { + final HAConfig haConfig = getHaConfig(); + final HAProvider haProvider = getHaProvider(); + final HAResource resource = getResource(); + final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + + if (t != null && t instanceof HACheckerException) { + haManager.transitionHAState(HAConfig.Event.Ineligible, getHaConfig()); + counter.resetActivityCounter(); + return; + } + + counter.incrActivityCounter(!result); + + long maxActivityChecks = (Long)haProvider.getConfigValue(HAProviderConfig.MaxActivityChecks, resource); + if (counter.getActivityCheckCounter() < maxActivityChecks) { + haManager.transitionHAState(HAConfig.Event.TooFewActivityCheckSamples, haConfig); + return; + } + + double activityCheckFailureRatio = (Double)haProvider.getConfigValue(HAProviderConfig.ActivityCheckFailureRatio, resource); + if (counter.hasActivityThresholdExceeded(activityCheckFailureRatio)) { + haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureOverThresholdRatio, haConfig); + } else { + haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureUnderThresholdRatio, haConfig); + counter.markResourceDegraded(); + } + counter.resetActivityCounter(); + } +} diff --git a/server/src/org/apache/cloudstack/ha/task/BaseHATask.java b/server/src/org/apache/cloudstack/ha/task/BaseHATask.java new file mode 100644 index 00000000000..3ed87388026 --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/BaseHATask.java @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; +import org.apache.log4j.Logger; + +import java.util.concurrent.Callable; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; + +public abstract class BaseHATask implements Callable { + public static final Logger LOG = Logger.getLogger(BaseHATask.class); + + private final HAResource resource; + private final HAProvider haProvider; + private final HAConfig haConfig; + private final ExecutorService executor; + private Long timeout; + + public BaseHATask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig, + final ExecutorService executor) { + this.resource = resource; + this.haProvider = haProvider; + this.haConfig = haConfig; + this.executor = executor; + this.timeout = (Long)haProvider.getConfigValue(haProviderConfig, resource); + } + + public HAProvider getHaProvider() { + return haProvider; + } + + public HAConfig getHaConfig() { + return haConfig; + } + + public HAResource getResource() { + return resource; + } + + public String getTaskType() { + return this.getClass().getSimpleName(); + } + + public boolean performAction() throws HACheckerException, HAFenceException, HARecoveryException { + return true; + } + + public abstract void processResult(boolean result, Throwable e); + + @Override + public Boolean call() { + final Future future = executor.submit(new Callable() { + @Override + public Boolean call() throws HACheckerException, HAFenceException, HARecoveryException { + return performAction(); + } + }); + + boolean result = false; + Throwable throwable = null; + try { + if (timeout == null) { + result = future.get(); + } else { + result = future.get(timeout, TimeUnit.SECONDS); + } + } catch (InterruptedException | ExecutionException e) { + LOG.warn("Exception occurred while running " + getTaskType() + " on a resource: " + e.getMessage(), e.getCause()); + throwable = e.getCause(); + } catch (TimeoutException e) { + LOG.trace(getTaskType() + " operation timed out for resource id:" + resource.getId()); + } + processResult(result, throwable); + return result; + } + +} diff --git a/server/src/org/apache/cloudstack/ha/task/FenceTask.java b/server/src/org/apache/cloudstack/ha/task/FenceTask.java new file mode 100644 index 00000000000..d9fd62c164c --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/FenceTask.java @@ -0,0 +1,55 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.HAResourceCounter; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAFenceException; +import org.apache.cloudstack.ha.provider.HAProvider; + +import javax.inject.Inject; +import java.util.concurrent.ExecutorService; + +public class FenceTask extends BaseHATask { + + @Inject + private HAManager haManager; + + public FenceTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, + final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + } + + public boolean performAction() throws HACheckerException, HAFenceException { + return getHaProvider().fence(getResource()); + } + + public void processResult(boolean result, Throwable e) { + final HAConfig haConfig = getHaConfig(); + final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (result) { + counter.resetRecoveryCounter(); + haManager.transitionHAState(HAConfig.Event.Fenced, haConfig); + getHaProvider().setFenced(getResource()); + } + getHaProvider().sendAlert(getResource(), HAConfig.HAState.Fencing); + } +} diff --git a/server/src/org/apache/cloudstack/ha/task/HealthCheckTask.java b/server/src/org/apache/cloudstack/ha/task/HealthCheckTask.java new file mode 100644 index 00000000000..92dcdc2164d --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/HealthCheckTask.java @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.HAResourceCounter; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import java.util.concurrent.ExecutorService; + +public class HealthCheckTask extends BaseHATask { + + @Inject + private HAManager haManager; + + public static final Logger LOG = Logger.getLogger(HealthCheckTask.class); + + public HealthCheckTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, + final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + } + + public boolean performAction() throws HACheckerException { + return getHaProvider().isHealthy(getResource()); + } + + public void processResult(boolean result, Throwable e) { + final HAConfig haConfig = getHaConfig(); + final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType()); + if (result) { + haManager.transitionHAState(HAConfig.Event.HealthCheckPassed, haConfig); + if (haConfig.getState() == HAConfig.HAState.Fenced) { + haManager.disableHA(haConfig.getResourceId(), haConfig.getResourceType()); + } + counter.resetSuspectTimestamp(); + counter.resetActivityCounter(); + counter.resetRecoveryCounter(); + } else { + haManager.transitionHAState(HAConfig.Event.HealthCheckFailed, haConfig); + counter.markResourceSuspected(); + } + } +} diff --git a/server/src/org/apache/cloudstack/ha/task/RecoveryTask.java b/server/src/org/apache/cloudstack/ha/task/RecoveryTask.java new file mode 100644 index 00000000000..b4eb863fbfc --- /dev/null +++ b/server/src/org/apache/cloudstack/ha/task/RecoveryTask.java @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.ha.task; + +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.ha.HAManager; +import org.apache.cloudstack.ha.HAResource; +import org.apache.cloudstack.ha.provider.HACheckerException; +import org.apache.cloudstack.ha.provider.HAProvider; +import org.apache.cloudstack.ha.provider.HARecoveryException; + +import javax.inject.Inject; +import java.util.concurrent.ExecutorService; + +public class RecoveryTask extends BaseHATask { + + @Inject + private HAManager haManager; + + public RecoveryTask(final HAResource resource, final HAProvider haProvider, final HAConfig haConfig, + final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) { + super(resource, haProvider, haConfig, haProviderConfig, executor); + } + + public boolean performAction() throws HACheckerException, HARecoveryException { + return getHaProvider().recover(getResource()); + } + + public void processResult(boolean result, Throwable e) { + final HAConfig haConfig = getHaConfig(); + if (result) { + haManager.transitionHAState(HAConfig.Event.Recovered, haConfig); + } + getHaProvider().sendAlert(getResource(), HAConfig.HAState.Recovering); + } +} diff --git a/setup/db/db/schema-41000to41100.sql b/setup/db/db/schema-41000to41100.sql index eacddc15c4d..c67e710ff76 100644 --- a/setup/db/db/schema-41000to41100.sql +++ b/setup/db/db/schema-41000to41100.sql @@ -138,3 +138,106 @@ CREATE TABLE IF NOT EXISTS `cloud`.`crl` ( KEY (`serial`), UNIQUE KEY (`serial`, `cn`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +-- Host HA feature +CREATE TABLE IF NOT EXISTS `cloud`.`ha_config` ( + `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, + `resource_id` bigint(20) unsigned DEFAULT NULL COMMENT 'id of the resource', + `resource_type` varchar(255) NOT NULL COMMENT 'the type of the resource', + `enabled` int(1) unsigned DEFAULT '0' COMMENT 'is HA enabled for the resource', + `ha_state` varchar(255) DEFAULT 'Disabled' COMMENT 'HA state', + `provider` varchar(255) DEFAULT NULL COMMENT 'HA provider', + `update_count` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT 'state based incr-only counter for atomic ha_state updates', + `update_time` datetime COMMENT 'last ha_state update datetime', + `mgmt_server_id` bigint(20) unsigned DEFAULT NULL COMMENT 'management server id that is responsible for the HA for the resource', + PRIMARY KEY (`id`), + KEY `i_ha_config__enabled` (`enabled`), + KEY `i_ha_config__ha_state` (`ha_state`), + KEY `i_ha_config__mgmt_server_id` (`mgmt_server_id`), + UNIQUE KEY (`resource_id`, `resource_type`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8; + +DELETE from `cloud`.`configuration` where name='outofbandmanagement.sync.interval'; + +-- Host HA changes: +DROP VIEW IF EXISTS `cloud`.`host_view`; +CREATE VIEW `cloud`.`host_view` AS + select + host.id, + host.uuid, + host.name, + host.status, + host.disconnected, + host.type, + host.private_ip_address, + host.version, + host.hypervisor_type, + host.hypervisor_version, + host.capabilities, + host.last_ping, + host.created, + host.removed, + host.resource_state, + host.mgmt_server_id, + host.cpu_sockets, + host.cpus, + host.speed, + host.ram, + cluster.id cluster_id, + cluster.uuid cluster_uuid, + cluster.name cluster_name, + cluster.cluster_type, + data_center.id data_center_id, + data_center.uuid data_center_uuid, + data_center.name data_center_name, + data_center.networktype data_center_type, + host_pod_ref.id pod_id, + host_pod_ref.uuid pod_uuid, + host_pod_ref.name pod_name, + host_tags.tag, + guest_os_category.id guest_os_category_id, + guest_os_category.uuid guest_os_category_uuid, + guest_os_category.name guest_os_category_name, + mem_caps.used_capacity memory_used_capacity, + mem_caps.reserved_capacity memory_reserved_capacity, + cpu_caps.used_capacity cpu_used_capacity, + cpu_caps.reserved_capacity cpu_reserved_capacity, + async_job.id job_id, + async_job.uuid job_uuid, + async_job.job_status job_status, + async_job.account_id job_account_id, + oobm.enabled AS `oobm_enabled`, + oobm.power_state AS `oobm_power_state`, + ha_config.enabled AS `ha_enabled`, + ha_config.ha_state AS `ha_state`, + ha_config.provider AS `ha_provider` + from + `cloud`.`host` + left join + `cloud`.`cluster` ON host.cluster_id = cluster.id + left join + `cloud`.`data_center` ON host.data_center_id = data_center.id + left join + `cloud`.`host_pod_ref` ON host.pod_id = host_pod_ref.id + left join + `cloud`.`host_details` ON host.id = host_details.host_id + and host_details.name = 'guest.os.category.id' + left join + `cloud`.`guest_os_category` ON guest_os_category.id = CONVERT( host_details.value , UNSIGNED) + left join + `cloud`.`host_tags` ON host_tags.host_id = host.id + left join + `cloud`.`op_host_capacity` mem_caps ON host.id = mem_caps.host_id + and mem_caps.capacity_type = 0 + left join + `cloud`.`op_host_capacity` cpu_caps ON host.id = cpu_caps.host_id + and cpu_caps.capacity_type = 1 + left join + `cloud`.`async_job` ON async_job.instance_id = host.id + and async_job.instance_type = 'Host' + and async_job.job_status = 0 + left join + `cloud`.`oobm` ON oobm.host_id = host.id + left join + `cloud`.`ha_config` ON ha_config.resource_id=host.id + and ha_config.resource_type='Host'; diff --git a/setup/db/db/schema-481to490-cleanup.sql b/setup/db/db/schema-481to490-cleanup.sql index 0ab02d83089..adf566e07fe 100644 --- a/setup/db/db/schema-481to490-cleanup.sql +++ b/setup/db/db/schema-481to490-cleanup.sql @@ -272,79 +272,3 @@ CREATE VIEW `cloud`.`user_view` AS and async_job.instance_type = 'User' and async_job.job_status = 0; --- Out-of-band management -DROP VIEW IF EXISTS `cloud`.`host_view`; -CREATE VIEW `cloud`.`host_view` AS - select - host.id, - host.uuid, - host.name, - host.status, - host.disconnected, - host.type, - host.private_ip_address, - host.version, - host.hypervisor_type, - host.hypervisor_version, - host.capabilities, - host.last_ping, - host.created, - host.removed, - host.resource_state, - host.mgmt_server_id, - host.cpu_sockets, - host.cpus, - host.speed, - host.ram, - cluster.id cluster_id, - cluster.uuid cluster_uuid, - cluster.name cluster_name, - cluster.cluster_type, - data_center.id data_center_id, - data_center.uuid data_center_uuid, - data_center.name data_center_name, - data_center.networktype data_center_type, - host_pod_ref.id pod_id, - host_pod_ref.uuid pod_uuid, - host_pod_ref.name pod_name, - host_tags.tag, - guest_os_category.id guest_os_category_id, - guest_os_category.uuid guest_os_category_uuid, - guest_os_category.name guest_os_category_name, - mem_caps.used_capacity memory_used_capacity, - mem_caps.reserved_capacity memory_reserved_capacity, - cpu_caps.used_capacity cpu_used_capacity, - cpu_caps.reserved_capacity cpu_reserved_capacity, - async_job.id job_id, - async_job.uuid job_uuid, - async_job.job_status job_status, - async_job.account_id job_account_id, - oobm.enabled AS `oobm_enabled`, - oobm.power_state AS `oobm_power_state` - from - `cloud`.`host` - left join - `cloud`.`cluster` ON host.cluster_id = cluster.id - left join - `cloud`.`data_center` ON host.data_center_id = data_center.id - left join - `cloud`.`host_pod_ref` ON host.pod_id = host_pod_ref.id - left join - `cloud`.`host_details` ON host.id = host_details.host_id - and host_details.name = 'guest.os.category.id' - left join - `cloud`.`guest_os_category` ON guest_os_category.id = CONVERT( host_details.value , UNSIGNED) - left join - `cloud`.`host_tags` ON host_tags.host_id = host.id - left join - `cloud`.`op_host_capacity` mem_caps ON host.id = mem_caps.host_id - and mem_caps.capacity_type = 0 - left join - `cloud`.`op_host_capacity` cpu_caps ON host.id = cpu_caps.host_id - and cpu_caps.capacity_type = 1 - left join - `cloud`.`async_job` ON async_job.instance_id = host.id - and async_job.instance_type = 'Host' - and async_job.job_status = 0 - left join - `cloud`.`oobm` ON oobm.host_id = host.id; diff --git a/setup/db/db/schema-481to490.sql b/setup/db/db/schema-481to490.sql index 7f33043146e..49cfc8346c5 100644 --- a/setup/db/db/schema-481to490.sql +++ b/setup/db/db/schema-481to490.sql @@ -495,6 +495,83 @@ CREATE TABLE IF NOT EXISTS `cloud`.`oobm` ( CONSTRAINT `fk_oobm__host_id` FOREIGN KEY (`host_id`) REFERENCES `host` (`id`) ON DELETE CASCADE ) ENGINE=InnoDB DEFAULT CHARSET=utf8; +-- Out-of-band management +DROP VIEW IF EXISTS `cloud`.`host_view`; +CREATE VIEW `cloud`.`host_view` AS + select + host.id, + host.uuid, + host.name, + host.status, + host.disconnected, + host.type, + host.private_ip_address, + host.version, + host.hypervisor_type, + host.hypervisor_version, + host.capabilities, + host.last_ping, + host.created, + host.removed, + host.resource_state, + host.mgmt_server_id, + host.cpu_sockets, + host.cpus, + host.speed, + host.ram, + cluster.id cluster_id, + cluster.uuid cluster_uuid, + cluster.name cluster_name, + cluster.cluster_type, + data_center.id data_center_id, + data_center.uuid data_center_uuid, + data_center.name data_center_name, + data_center.networktype data_center_type, + host_pod_ref.id pod_id, + host_pod_ref.uuid pod_uuid, + host_pod_ref.name pod_name, + host_tags.tag, + guest_os_category.id guest_os_category_id, + guest_os_category.uuid guest_os_category_uuid, + guest_os_category.name guest_os_category_name, + mem_caps.used_capacity memory_used_capacity, + mem_caps.reserved_capacity memory_reserved_capacity, + cpu_caps.used_capacity cpu_used_capacity, + cpu_caps.reserved_capacity cpu_reserved_capacity, + async_job.id job_id, + async_job.uuid job_uuid, + async_job.job_status job_status, + async_job.account_id job_account_id, + oobm.enabled AS `oobm_enabled`, + oobm.power_state AS `oobm_power_state` + from + `cloud`.`host` + left join + `cloud`.`cluster` ON host.cluster_id = cluster.id + left join + `cloud`.`data_center` ON host.data_center_id = data_center.id + left join + `cloud`.`host_pod_ref` ON host.pod_id = host_pod_ref.id + left join + `cloud`.`host_details` ON host.id = host_details.host_id + and host_details.name = 'guest.os.category.id' + left join + `cloud`.`guest_os_category` ON guest_os_category.id = CONVERT( host_details.value , UNSIGNED) + left join + `cloud`.`host_tags` ON host_tags.host_id = host.id + left join + `cloud`.`op_host_capacity` mem_caps ON host.id = mem_caps.host_id + and mem_caps.capacity_type = 0 + left join + `cloud`.`op_host_capacity` cpu_caps ON host.id = cpu_caps.host_id + and cpu_caps.capacity_type = 1 + left join + `cloud`.`async_job` ON async_job.instance_id = host.id + and async_job.instance_type = 'Host' + and async_job.job_status = 0 + left join + `cloud`.`oobm` ON oobm.host_id = host.id; + INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '4.0', 'centosGuest', 171, now(), 0); INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '4.1', 'centosGuest', 171, now(), 0); INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.0', 'centosGuest', 171, now(), 0); @@ -545,4 +622,3 @@ INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervis INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.0', 'centos64Guest', 228, now(), 0); INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.1', 'centos64Guest', 228, now(), 0); INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.5', 'centos64Guest', 228, now(), 0); - diff --git a/setup/db/db/schema-4930to41000.sql b/setup/db/db/schema-4930to41000.sql index edc9d603534..db4e8605580 100644 --- a/setup/db/db/schema-4930to41000.sql +++ b/setup/db/db/schema-4930to41000.sql @@ -277,4 +277,4 @@ CREATE TABLE `cloud`.`external_netscaler_controlcenter` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; ALTER TABLE `cloud`.`sslcerts` ADD COLUMN `name` varchar(255) NULL default NULL COMMENT 'Name of the Certificate'; -ALTER TABLE `cloud`.`network_offerings` ADD COLUMN `service_package_id` varchar(255) NULL default NULL COMMENT 'Netscaler ControlCenter Service Package'; \ No newline at end of file +ALTER TABLE `cloud`.`network_offerings` ADD COLUMN `service_package_id` varchar(255) NULL default NULL COMMENT 'Netscaler ControlCenter Service Package'; diff --git a/test/integration/component/test_host_ha.py b/test/integration/component/test_host_ha.py index 6361564e816..2af5ea93eb7 100644 --- a/test/integration/component/test_host_ha.py +++ b/test/integration/component/test_host_ha.py @@ -84,7 +84,7 @@ class TestHostHA(cloudstackTestCase): "sleep": 60, "timeout": 10, } - + def tearDown(self): try: @@ -95,20 +95,20 @@ class TestHostHA(cloudstackTestCase): raise Exception("Warning: Exception during cleanup : %s" % e) return - + + def createVMs(self, hostId, number, local): - self.template = get_template( self.apiclient, self.zone.id, self.services["ostype"] ) - + if self.template == FAILED: assert False, "get_template() failed to return template with description %s" % self.services["ostype"] - + self.logger.debug("Using template %s " % self.template.id) - + if local: self.service_offering = ServiceOffering.create( self.apiclient, @@ -119,10 +119,9 @@ class TestHostHA(cloudstackTestCase): self.apiclient, self.services["service_offering"] ) - - + self.logger.debug("Using service offering %s " % self.service_offering.id) - + vms = [] for i in range(0, number): self.services["vm"]["zoneid"] = self.zone.id @@ -139,7 +138,7 @@ class TestHostHA(cloudstackTestCase): self.cleanup.append(vm) self.logger.debug("VM create = {}".format(vm.id)) return vm - + def noOfVMsOnHost(self, hostId): listVms = VirtualMachine.list( self.apiclient, @@ -150,12 +149,12 @@ class TestHostHA(cloudstackTestCase): for vm in listVms: self.logger.debug('VirtualMachine on Hyp 1 = {}'.format(vm.id)) vmnos = vmnos + 1 - + return vmnos - + def checkHostDown(self, fromHostIp, testHostIp): try: - ssh = SshClient(fromHostIp, 22, "root", "password") + ssh = SshClient(fromHostIp, 22, "root", "password") res = ssh.execute("ping -c 1 %s" % testHostIp) result = str(res) if result.count("100% packet loss") == 1: @@ -165,10 +164,10 @@ class TestHostHA(cloudstackTestCase): except Exception as e: self.logger.debug("Got exception %s" % e) return False, 1 - + def checkHostUp(self, fromHostIp, testHostIp): try: - ssh = SshClient(fromHostIp, 22, "root", "password") + ssh = SshClient(fromHostIp, 22, "root", "password") res = ssh.execute("ping -c 1 %s" % testHostIp) result = str(res) if result.count(" 0% packet loss") == 1: @@ -178,8 +177,8 @@ class TestHostHA(cloudstackTestCase): except Exception as e: self.logger.debug("Got exception %s" % e) return False, 1 - - + + def isOnlyNFSStorageAvailable(self): if self.zone.localstorageenabled: return False @@ -196,13 +195,13 @@ class TestHostHA(cloudstackTestCase): for storage_pool in storage_pools: if storage_pool.type == u'NetworkFilesystem': return True - + return False - + def isOnlyLocalStorageAvailable(self): if not(self.zone.localstorageenabled): return False - + storage_pools = StoragePool.list( self.apiclient, zoneid=self.zone.id, @@ -216,13 +215,13 @@ class TestHostHA(cloudstackTestCase): for storage_pool in storage_pools: if storage_pool.type == u'NetworkFilesystem': return False - + return True - + def isLocalAndNFSStorageAvailable(self): if not(self.zone.localstorageenabled): return False - + storage_pools = StoragePool.list( self.apiclient, zoneid=self.zone.id, @@ -236,10 +235,10 @@ class TestHostHA(cloudstackTestCase): for storage_pool in storage_pools: if storage_pool.type == u'NetworkFilesystem': return True - + return False - - + + def checkHostStateInCloudstack(self, state, hostId): try: listHost = Host.list( @@ -254,7 +253,7 @@ class TestHostHA(cloudstackTestCase): True, "Check if listHost returns a valid response" ) - + self.assertEqual( len(listHost), 1, @@ -268,19 +267,30 @@ class TestHostHA(cloudstackTestCase): except Exception as e: self.logger.debug("Got exception %s" % e) return False, 1 - - + + def disconnectHostfromNetwork(self, hostIp, timeout): srcFile = os.path.dirname(os.path.realpath(__file__)) + "/test_host_ha.sh" if not(os.path.isfile(srcFile)): self.logger.debug("File %s not found" % srcFile) raise unittest.SkipTest("Script file %s required for HA not found" % srcFile); - + ssh = SshClient(hostIp, 22, "root", "password") ssh.scp(srcFile, "/root/test_host_ha.sh") - ssh.execute("nohup sh /root/test_host_ha.sh %s > /dev/null 2>&1 &\n" % timeout) + ssh.execute("nohup sh /root/test_host_ha.sh -t %s -d all > /dev/null 2>&1 &\n" % timeout) return - + + def stopAgentOnHost(self, hostIp, timeout): + srcFile = os.path.dirname(os.path.realpath(__file__)) + "/test_host_ha.sh" + if not(os.path.isfile(srcFile)): + self.logger.debug("File %s not found" % srcFile) + raise unittest.SkipTest("Script file %s required for HA not found" % srcFile); + + ssh = SshClient(hostIp, 22, "root", "password") + ssh.scp(srcFile, "/root/test_host_ha.sh") + ssh.execute("nohup sh /root/test_host_ha.sh -t %s -d agent > /dev/null 2>&1 &\n" % timeout) + return + @attr( tags=[ @@ -292,11 +302,13 @@ class TestHostHA(cloudstackTestCase): "sg"], required_hardware="true") def test_01_host_ha_with_nfs_storagepool_with_vm(self): - + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return + if not(self.isOnlyNFSStorageAvailable()): raise unittest.SkipTest("Skipping this test as this is for NFS store only."); return - + listHost = Host.list( self.apiclient, type='Routing', @@ -305,61 +317,61 @@ class TestHostHA(cloudstackTestCase): ) for host in listHost: self.logger.debug('Hypervisor = {}'.format(host.id)) - - + + if len(listHost) != 2: self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); return - + no_of_vms = self.noOfVMsOnHost(listHost[0].id) - + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) - + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) - - + + if no_of_vms < 5: self.logger.debug("test_01: Create VMs as there are not enough vms to check host ha") no_vm_req = 5 - no_of_vms if (no_vm_req > 0): self.logger.debug("Creating vms = {}".format(no_vm_req)) self.vmlist = self.createVMs(listHost[0].id, no_vm_req, False) - + ha_host = listHost[1] other_host = listHost[0] if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): ha_host = listHost[0] other_host = listHost[1] - + self.disconnectHostfromNetwork(ha_host.ipaddress, 400) - + hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress) - if not(hostDown): + if not(hostDown): raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress)) - + hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id) #the test could have failed here but we will try our best to get host back in consistent state - + no_of_vms = self.noOfVMsOnHost(ha_host.id) no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) # hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress) - if not(hostUp): + if not(hostUp): self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress)) - - + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) - - if not(hostDownInCloudstack): + + if not(hostDownInCloudstack): raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress)) - if not(hostUpInCloudstack): + if not(hostUpInCloudstack): raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) - + return - + @attr( tags=[ "advanced", @@ -370,11 +382,12 @@ class TestHostHA(cloudstackTestCase): "sg"], required_hardware="true") def test_02_host_ha_with_local_storage_and_nfs(self): - + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return if not(self.isLocalAndNFSStorageAvailable()): raise unittest.SkipTest("Skipping this test as this is for Local storage and NFS storage only."); return - + listHost = Host.list( self.apiclient, type='Routing', @@ -383,62 +396,62 @@ class TestHostHA(cloudstackTestCase): ) for host in listHost: self.logger.debug('Hypervisor = {}'.format(host.id)) - - + + if len(listHost) != 2: self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); return - + no_of_vms = self.noOfVMsOnHost(listHost[0].id) - + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) - + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) - - + + if no_of_vms < 5: self.logger.debug("test_02: Create VMs as there are not enough vms to check host ha") no_vm_req = 5 - no_of_vms if (no_vm_req > 0): self.logger.debug("Creating vms = {}".format(no_vm_req)) self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True) - + ha_host = listHost[1] other_host = listHost[0] if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): ha_host = listHost[0] other_host = listHost[1] - + self.disconnectHostfromNetwork(ha_host.ipaddress, 400) - + hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress) - if not(hostDown): + if not(hostDown): raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress)) - + hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id) #the test could have failed here but we will try our best to get host back in consistent state - + no_of_vms = self.noOfVMsOnHost(ha_host.id) no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) # hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress) - if not(hostUp): + if not(hostUp): self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress)) - - + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) - - if not(hostDownInCloudstack): + + if not(hostDownInCloudstack): raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress)) - if not(hostUpInCloudstack): + if not(hostUpInCloudstack): raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) - + return - - - + + + @attr( tags=[ "advanced", @@ -449,11 +462,13 @@ class TestHostHA(cloudstackTestCase): "sg"], required_hardware="true") def test_03_host_ha_with_only_local_storage(self): - + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return + if not(self.isOnlyLocalStorageAvailable()): raise unittest.SkipTest("Skipping this test as this is for Local storage only."); return - + listHost = Host.list( self.apiclient, type='Routing', @@ -462,55 +477,125 @@ class TestHostHA(cloudstackTestCase): ) for host in listHost: self.logger.debug('Hypervisor = {}'.format(host.id)) - - + + if len(listHost) != 2: self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); return - + no_of_vms = self.noOfVMsOnHost(listHost[0].id) - + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) - + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) - + if no_of_vms < 5: self.logger.debug("test_03: Create VMs as there are not enough vms to check host ha") no_vm_req = 5 - no_of_vms if (no_vm_req > 0): self.logger.debug("Creating vms = {}".format(no_vm_req)) self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True) - + ha_host = listHost[1] other_host = listHost[0] if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): ha_host = listHost[0] other_host = listHost[1] - + self.disconnectHostfromNetwork(ha_host.ipaddress, 400) - + hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress) - if not(hostDown): + if not(hostDown): raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress)) - + hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Alert", ha_host.id) #the test could have failed here but we will try our best to get host back in consistent state - + no_of_vms = self.noOfVMsOnHost(ha_host.id) no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) # hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress) - if not(hostUp): + if not(hostUp): self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress)) - - + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) - - if not(hostDownInCloudstack): + + if not(hostDownInCloudstack): raise self.fail("Host is not in alert %s, in cloudstack so failing test " % (ha_host.ipaddress)) - if not(hostUpInCloudstack): + if not(hostUpInCloudstack): raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) - - return \ No newline at end of file + + return + + + @attr( + tags=[ + "advanced", + "advancedns", + "smoke", + "basic", + "eip", + "sg"], + required_hardware="true") + def test_04_host_ha_vmactivity_check(self): + + if not(self.isOnlyNFSStorageAvailable()): + raise unittest.SkipTest("Skipping this test as this is for NFS store only."); + return + + listHost = Host.list( + self.apiclient, + type='Routing', + zoneid=self.zone.id, + podid=self.pod.id, + ) + for host in listHost: + self.logger.debug('Hypervisor = {}'.format(host.id)) + + + if len(listHost) != 2: + self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost)); + raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost)); + return + + no_of_vms = self.noOfVMsOnHost(listHost[0].id) + + no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id) + + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) + + + if no_of_vms < 5: + self.logger.debug("test_01: Create VMs as there are not enough vms to check host ha") + no_vm_req = 5 - no_of_vms + if (no_vm_req > 0): + self.logger.debug("Creating vms = {}".format(no_vm_req)) + self.vmlist = self.createVMs(listHost[0].id, no_vm_req, False) + + ha_host = listHost[1] + other_host = listHost[0] + if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id): + ha_host = listHost[0] + other_host = listHost[1] + + self.stopAgentOnHost(ha_host.ipaddress, 150) + + hostDisconnectedInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Disconnected", ha_host.id) + #the test could have failed here but we will try our best to get host back in consistent state + + no_of_vms = self.noOfVMsOnHost(ha_host.id) + no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id) + self.logger.debug("Number of VMS on hosts = %s" % no_of_vms) + # + + hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id) + + if not(hostDisconnectedInCloudstack): + raise self.fail("Host is not disconnected %s, in cloudstack so failing test " % (ha_host.ipaddress)) + if not(hostUpInCloudstack): + raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress)) + + return diff --git a/test/integration/component/test_host_ha.sh b/test/integration/component/test_host_ha.sh index 85aadb1b688..b27038840c9 100755 --- a/test/integration/component/test_host_ha.sh +++ b/test/integration/component/test_host_ha.sh @@ -1,40 +1,100 @@ #!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. -#bring down all eth interfaces +set -x -usage() { echo "Usage: $0 "; exit 1; } +usage() { echo "Usage: $0 -d -t "; exit 1; } -case $1 in +Interval= +Down= +while getopts 'd:t:' OPTION +do + case $OPTION in + d) + Down="$OPTARG" + ;; + t) + Interval="$OPTARG" + ;; + *) + usage + ;; + esac +done + + +if [ -z $Interval ]; then + usage +fi + + +if [ "$Down" != 'all' ]; then + if [ "$Down" != 'agent' ]; then + usage + fi +fi + +case $Interval in ''|*[!0-9]*) echo "The parameter should be an integer"; exit ;; *) echo $1 ;; esac -if [ -z $1 ]; then - usage -elif [ $1 -lt 1 ]; then +if [ $Interval -lt 1 ]; then echo "Down time should be at least 1 second" exit 1 -elif [ $1 -gt 5000 ]; then +elif [ $Interval -gt 5000 ]; then echo "Down time should be less than 5000 second" exit 1 fi -for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth` + +for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep "^eth.$"` do - ifconfig $i down + ifconfig $i down done service cloudstack-agent stop update-rc.d -f cloudstack-agent remove -sleep $1 +sleep 1 -for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth` +if [ "$Down" = 'agent' ]; then + for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep "^eth.$"` + do + ifconfig $i up + done +fi + +counter=$Interval +while [ $counter -gt 0 ] do - ifconfig $i up + sleep 1 + counter=$(( $counter - 1 )) done +if [ "$Down" = 'all' ]; then + for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth` + do + ifconfig $i up + done +fi + update-rc.d -f cloudstack-agent defaults -service cloudstack-agent start \ No newline at end of file +service cloudstack-agent start diff --git a/test/integration/smoke/test_ha_for_host.py b/test/integration/smoke/test_ha_for_host.py new file mode 100644 index 00000000000..efc4f1f1b41 --- /dev/null +++ b/test/integration/smoke/test_ha_for_host.py @@ -0,0 +1,247 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from marvin.cloudstackTestCase import * +from marvin.cloudstackAPI import * +from marvin.lib.utils import * +from marvin.lib.common import * +from nose.plugins.attrib import attr +import cmd +from cmd import Cmd + + +class TestHaForHost(cloudstackTestCase): + """ Test cases for configuring HA for Host + """ + + def setUp(self): + testClient = super(TestHaForHost, self).getClsTestClient() + + self.apiclient = testClient.getApiClient() + self.dbclient = testClient.getDbConnection() + self.services = testClient.getParsedTestDataConfig() + + self.zone = get_zone(self.apiclient, testClient.getZoneForTests()) + self.host = None + self.server = None + + self.cleanup = [] + + def tearDown(self): + try: + self.dbclient.execute("delete from ha_config where resource_type='Host'") + cleanup_resources(self.apiclient, self.cleanup) + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + + def getHost(self, hostId=None): + if self.host and hostId is None: + return self.host + + response = list_hosts( + self.apiclient, + zoneid=self.zone.id, + type='Routing', + id=hostId + ) + if len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No hosts found, skipping HA for Host test") + + + def getHaProvider(self, host): + cmd = listHostHAProviders.listHostHAProvidersCmd() + cmd.hypervisor = host.hypervisor + response = self.apiclient.listHostHAProviders(cmd) + return response[0].haprovider + + + def configureHaProvider(self): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.hostid = self.getHost().id + cmd.provider = self.getHaProvider(self.getHost()) + return self.apiclient.configureHAForHost(cmd) + + + def getHaForHostEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + + def getHaForHostDisableCmd(self): + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + + def getListHostHAResources(self): + cmd = listHostHAResources.listHostHAResourcesCmd() + cmd.hostid = self.getHost().id + return cmd + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_enable_ha_for_host(self): + """ + This test enables HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_enable_ha_for_host_invalid(self): + """ + This is a negative test for enable HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostEnableCmd() + cmd.hostid = -1 + + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_disable_ha_for_host(self): + """ + This test disables HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostDisableCmd() + + response = self.apiclient.disableHAForHost(cmd) + + self.assertTrue(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + response = self.getHost(cmd.hostid) + + self.assertEqual(response.hostha.hastate, "Disabled") + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_disable_ha_for_host_invalid(self): + """ + This is a negative test for disable HA for a host + """ + + self.configureHaProvider() + cmd = self.getHaForHostDisableCmd() + cmd.hostid = -1 + + try: + response = self.apiclient.disableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host(self): + """ + Test that verifies the listHAForHost API + """ + self.configureHaProvider() + db_count = self.dbclient.execute("SELECT count(*) FROM cloud.ha_config") + + cmd = self.getListHostHAResources() + del cmd.hostid + response = self.apiclient.listHostHAResources(cmd) + + self.assertEqual(db_count[0][0], len(response)) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host_valid(self): + """ + Valid test for listing a specific host HA resources + """ + + self.configureHaProvider() + cmd = self.getListHostHAResources() + response = self.apiclient.listHostHAResources(cmd) + self.assertEqual(response[0].hostid, cmd.hostid) + + + @attr(tags=["advanced", + "advancedns", + "smoke", + "basic", + "sg"], + required_hardware="false") + def test_list_ha_for_host_invalid(self): + """ + Test that listHostHAResources is returning exception when called with invalid data + """ + + self.configureHaProvider() + cmd = self.getListHostHAResources() + cmd.hostid = "someinvalidvalue" + + try: + response = self.apiclient.listHostHAResources(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") diff --git a/test/integration/smoke/test_ha_kvm.py b/test/integration/smoke/test_ha_kvm.py new file mode 100644 index 00000000000..7709adc4859 --- /dev/null +++ b/test/integration/smoke/test_ha_kvm.py @@ -0,0 +1,700 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import marvin +from marvin.cloudstackTestCase import * +from marvin.cloudstackAPI import * +from marvin.lib.utils import * +from marvin.lib.base import * +from marvin.lib.common import * +from nose.plugins.attrib import attr + +import random + +from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer + +import random +import socket +import sys +import thread +import time + + +class TestHAKVM(cloudstackTestCase): + """ Test cases for host HA using KVM host(s) + """ + + def setUp(self): + self.apiclient = self.testClient.getApiClient() + self.hypervisor = self.testClient.getHypervisorInfo() + self.dbclient = self.testClient.getDbConnection() + self.services = self.testClient.getParsedTestDataConfig() + self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ + self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ + self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) + + # Cleanup any existing configs + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.host = self.getHost() + + # use random port for ipmisim + s = socket.socket() + s.bind(('', 0)) + self.serverPort = s.getsockname()[1] + s.close() + + self.cleanup = [] + + def getFakeMsId(self): + return self.fakeMsId + + def getFakeMsRunId(self): + return self.fakeMsId * 1000 + + def tearDown(self): + try: + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort()) + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'") + self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'") + cleanup_resources(self.apiclient, self.cleanup) + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def check_host_transition_to_available(self): + t_end = time.time() + 90 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate == "Available": + return + else: + continue + self.fail(self) + + def getHost(self): + response = list_hosts( + self.apiclient, + type='Routing', + resourcestate='Enabled' + ) + if response and len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No KVM hosts found, skipping host-ha test") + + def getHost(self, hostId=None): + + response = list_hosts( + self.apiclient, + type='Routing', + id=hostId + ) + if response and len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No KVM hosts found, skipping host-ha test") + + def getHostHaConfigCmd(self, provider='kvmhaprovider'): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.provider = provider + cmd.hostid = self.getHost().id + return cmd + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getHostHaDisableCmd(self): + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def configureAndEnableHostHa(self, initialize=True): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd()) + self.assertEqual(response.haenable, True) + if initialize: + self.configureKVMHAProviderState(True, True, True, False) + + def configureAndDisableHostHa(self, hostId): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + cmd.hostid = hostId + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + def enableHostHa(self, hostId): + cmd = self.getHostHaEnableCmd() + cmd.hostid = hostId + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + return response + + def configureKVMHAProviderState(self, health, activity, recover, fence): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.hostid = self.getHost().id + cmd.health = health + cmd.activity = activity + cmd.recover = recover + cmd.fence = fence + response = self.apiclient.configureKVMHAProviderState(cmd) + self.assertEqual(response.success, 'true') + + def checkSyncToState(self, state, interval=5000): + def checkForStateSync(expectedState): + response = self.getHost(hostId=self.getHost().id).hostha + return response.hastate == expectedState, None + + sync_interval = 1 + int(interval) / 1000 + res, _ = wait_until(sync_interval, 10, checkForStateSync, state) + if not res: + self.fail("Failed to get host.hastate synced to expected state:" + state) + response = self.getHost(hostId=self.getHost().id).hostha + self.assertEqual(response.hastate, state) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_configure_invalid_provider(self): + """ + Tests host-ha configuration with invalid driver + """ + cmd = self.getHostHaConfigCmd() + cmd.provider = 'randomDriverThatDoesNotExist' + try: + response = self.apiclient.configureHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_configure_default_driver(self): + """ + Tests host-ha configuration with valid data + """ + cmd = self.getHostHaConfigCmd() + response = self.apiclient.configureHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haprovider, cmd.provider.lower()) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_enable_feature_invalid(self): + """ + Tests ha feature enable command with invalid options + """ + cmd = self.getHostHaEnableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = enableHAForCluster.enableHAForClusterCmd() + response = self.apiclient.enableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = enableHAForZone.enableHAForZoneCmd() + response = self.apiclient.enableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_disable_feature_invalid(self): + """ + Tests ha feature disable command with invalid options + """ + cmd = self.getHostHaDisableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.disableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = disableHAForCluster.disableHAForClusterCmd() + response = self.apiclient.disableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = disableHAForZone.disableHAForZoneCmd() + response = self.apiclient.disableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_feature_valid(self): + """ + Tests host-ha enable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_disable_feature_valid(self): + """ + Tests host-ha disable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + response = self.getHost(hostId=cmd.hostid).hostha + self.assertEqual(response.hastate, 'Disabled') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_configure_ha_provider_invalid(self): + """ + Tests configure HA Provider with invalid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + # Setup wrong configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + try: + self.apiclient.configureHAForHost(conf_ha_cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_configure_ha_provider_valid(self): + """ + Tests configure HA Provider with valid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + # Setup configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + response = self.apiclient.configureHAForHost(conf_ha_cmd) + + # Check the response contains the set provider and hostID + self.assertEqual(response.haprovider, conf_ha_cmd.provider) + self.assertEqual(response.hostid, conf_ha_cmd.hostid) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_disable_oobm_ha_state_ineligible(self): + """ + Tests that when HA is enabled for a host, if oobm is disabled HA State should turn into Ineligible + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + # Disable OOBM + self.apiclient.configureOutOfBandManagement(self.getOobmConfigCmd()) + oobm_cmd = self.getOobmDisableCmd() + oobm_cmd.hostid = cmd.hostid + response = self.apiclient.disableOutOfBandManagementForHost(oobm_cmd) + self.assertEqual(response.hostid, oobm_cmd.hostid) + self.assertEqual(response.enabled, False) + + response = self.getHost(hostId=cmd.hostid).outofbandmanagement + self.assertEqual(response.powerstate, 'Disabled') + + # Verify HA State is Ineligeble + response = self.getHost(hostId=cmd.hostid).hostha + self.assertEqual(response.hastate, "Ineligible") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_configure_default_driver(self): + """ + Tests host-ha configuration with valid data + """ + cmd = self.getHostHaConfigCmd() + response = self.apiclient.configureHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haprovider, cmd.provider.lower()) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_enable_ha_when_host_powerstate_on(self): + """ + Tests that when HA is enabled for a host, if oobm state is on HA State should turn into Available + """ + + self.configureAndStartIpmiServer() + + self.assertIssueCommandState('ON', 'On') + + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + # Verify HA State is Available + self.check_host_transition_to_available() + + response = self.getHost() + if response.hostha.hastate is not "Available": + print response + + self.assertEqual(response.hostha.hastate, "Available") + + self.stopIpmiServer() + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_feature_without_setting_provider(self): + """ + Tests Enable HA without setting the provider, Exception is thrown + """ + host = self.get_non_configured_ha_host() + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + + try: + self.apiclient.enableHAForHost(cmd) + except Exception as e: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="treu") + def test_hostha_enable_ha_when_host_disabled(self): + """ + Tests Enable HA when host is disconnected, should be Ineligible + """ + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = self.host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + # Disable Host + self.disableHost(self.host.id) + + # Check HA State + try: + response = self.getHost(self.host.id) + self.assertEqual(response.hostha.hastate, "Ineligible") + except Exception as e: + self.enableHost(self.host.id) + self.fail(e) + + # Enable Host + self.enableHost(self.host.id) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_ha_when_host_inMaintenance(self): + """ + Tests Enable HA when host is in Maintenance mode, should be Ineligible + """ + + host = self.getHost() + + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + # Prepare for maintenance Host + self.setHostToMaintanance(host.id) + + # Check HA State + try: + response = self.getHost(host.id) + self.assertEqual(response.hostha.hastate, "Ineligible") + except Exception as e: + self.cancelMaintenance(host.id) + self.fail(e) + + # Enable Host + self.cancelMaintenance(host.id) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_hostha_enable_ha_when_host_disconected(self): + """ + Tests Enable HA when host is disconnected, should be Ineligible + """ + host = self.getHost() + + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + # Make Host Disconnected + self.killAgent() + + # Check HA State + try: + time.sleep(1) + response = self.getHost(self.host.id) + self.assertEqual(response.hostha.hastate, "Ineligible") + except Exception as e: + self.startAgent() + self.fail(e) + + # Enable Host + self.startAgent() + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_remove_ha_provider_not_possible(self): + """ + Tests HA Provider should be possible to be removed when HA is enabled + """ + + host = self.getHost() + + # Enable HA + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + enable = self.apiclient.enableHAForHost(cmd) + self.assertEqual(enable.hostid, cmd.hostid) + self.assertEqual(enable.haenable, True) + + try: + self.apiclient.configureHAForHost(self.getHostHaConfigCmd('')) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + def configureAndStartIpmiServer(self, power_state=None): + """ + Setup ipmisim and enable out-of-band management for host + """ + self.configureAndEnableOobm() + self.startIpmiServer() + if power_state: + bmc = IpmiServerContext().bmc + bmc.powerstate = power_state + + def assertIssueCommandState(self, command, expected): + """ + Asserts power action result for a given power command + """ + if command != 'STATUS': + self.issuePowerActionCmd(command) + response = self.issuePowerActionCmd('STATUS') + self.assertEqual(response.powerstate, expected) + + def configureAndEnableOobm(self): + self.apiclient.configureOutOfBandManagement(self.getOobmConfigCmd()) + response = self.apiclient.enableOutOfBandManagementForHost(self.getOobmEnableCmd()) + self.assertEqual(response.enabled, True) + + def startIpmiServer(self): + def startIpmiServer(tname, server): + self.debug("Starting ipmisim server") + try: + server.serve_forever() + except Exception: pass + IpmiServerContext('reset') + ThreadedIpmiServer.allow_reuse_address = False + server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer) + thread.start_new_thread(startIpmiServer, ("ipmi-server", server,)) + self.server = server + + def stopIpmiServer(self): + if self.server: + self.server.shutdown() + self.server.server_close() + + def getOobmIssueActionCmd(self): + cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd() + cmd.hostid = self.getHost().id + cmd.action = 'STATUS' + return cmd + + def issuePowerActionCmd(self, action, timeout=None): + cmd = self.getOobmIssueActionCmd() + cmd.action = action + if timeout: + cmd.timeout = timeout + + try: + return self.apiclient.issueOutOfBandManagementPowerAction(cmd) + except Exception as e: + if "packet session id 0x0 does not match active session" in str(e): + raise self.skipTest("Known ipmitool issue hit, skipping test") + raise e + + def getOobmEnableCmd(self): + cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getOobmDisableCmd(self): + cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getIpmiServerPort(self): + return self.serverPort + + def getOobmConfigCmd(self): + cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd() + cmd.driver = 'ipmitool' # The default available driver + cmd.address = self.getIpmiServerIp() + cmd.port = self.getIpmiServerPort() + cmd.username = 'admin' + cmd.password = 'password' + cmd.hostid = self.getHost().id + return cmd + + def getIpmiServerIp(self): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"])) + return s.getsockname()[0] + + def get_non_configured_ha_host(self): + + response = list_hosts( + self.apiclient, + type='Routing' + ) + + for host in response: + if host.haprovider is None: + return host + else: + cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.") + + raise self.skipTest("No KVM hosts found, skipping host-ha test") + + def getHAState(self, id): + cmd = listHostHAResources.listHostHAResourcesCmd() + cmd.hostid = id + response = self.apiclient.listHostHAResources(cmd) + + return response[0] + + def startAgent(self): + host = self.getHost() + SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("service cloudstack-agent start") + + def disableHost(self, id): + + cmd = updateHost.updateHostCmd() + cmd.id = id + cmd.allocationstate = "Disable" + + response = self.apiclient.updateHost(cmd) + + self.assertEqual(response.resourcestate, "Disabled") + + def enableHost(self, id): + cmd = updateHost.updateHostCmd() + cmd.id = id + cmd.allocationstate = "Enable" + + response = self.apiclient.updateHost(cmd) + + self.assertEqual(response.resourcestate, "Enabled") + + def setHostToMaintanance(self, id): + cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd() + cmd.id = id + + response = self.apiclient.prepareHostForMaintenance(cmd) + + self.assertEqual(response.resourcestate, "PrepareForMaintenance") + + def cancelMaintenance(self, id): + cmd = cancelHostMaintenance.cancelHostMaintenanceCmd() + cmd.id = id + + response = self.apiclient.cancelHostMaintenance(cmd) + + self.assertEqual(response.resourcestate, "Enabled") + + def killAgent(self): + host = self.getHost() + SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ + ("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')") diff --git a/test/integration/smoke/test_ha_kvm_agent.py b/test/integration/smoke/test_ha_kvm_agent.py new file mode 100644 index 00000000000..3efde0a97ec --- /dev/null +++ b/test/integration/smoke/test_ha_kvm_agent.py @@ -0,0 +1,535 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + + +from marvin.cloudstackTestCase import * +from marvin.lib.utils import * +from marvin.lib.base import * +from marvin.lib.common import * +from nose.plugins.attrib import attr + +from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer + +import random +import socket +import thread + + +class TestHaKVMAgent(cloudstackTestCase): + """ Test cases for out of band management + """ + + def setUp(self): + testClient = super(TestHaKVMAgent, self).getClsTestClient() + + self.apiClient = testClient.getApiClient() + self.dbclient = testClient.getDbConnection() + self.services = testClient.getParsedTestDataConfig() + + self.zone = get_zone(self.apiClient, testClient.getZoneForTests()) + self.host = self.getHost() + self.cluster_id = self.host.clusterid + self.server = None + + self.hypervisor = self.testClient.getHypervisorInfo() + self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ + self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__ + self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) + + # Cleanup any existing configs + self.dbclient.execute("delete from ha_config where resource_type='Host'") + + # use random port for ipmisim + s = socket.socket() + s.bind(('', 0)) + self.serverPort = s.getsockname()[1] + s.close() + + # Set Cluster-level setting in order to run tests faster + self.update_configuration("kvm.ha.activity.check.failure.ratio", "0.7") + self.update_configuration("kvm.ha.activity.check.interval", "10") + self.update_configuration("kvm.ha.activity.check.max.attempts", "5") + self.update_configuration("kvm.ha.activity.check.timeout", "60") + self.update_configuration("kvm.ha.degraded.max.period", "30") + self.update_configuration("kvm.ha.fence.timeout", "60") + self.update_configuration("kvm.ha.health.check.timeout", "10") + self.update_configuration("kvm.ha.recover.failure.threshold", "1") + self.update_configuration("kvm.ha.recover.timeout", "120") + self.update_configuration("kvm.ha.recover.wait.period", "60") + + self.service_offering = ServiceOffering.create( + self.apiClient, + self.services["service_offerings"] + ) + + self.template = get_template( + self.apiClient, + self.zone.id, + self.services["ostype"] + ) + + self.cleanup = [self.service_offering] + + def tearDown(self): + try: + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort()) + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'") + self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'") + cleanup_resources(self.apiClient, self.cleanup) + if self.server: + self.server.shutdown() + self.server.server_close() + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + def getFakeMsId(self): + return self.fakeMsId + + def getFakeMsRunId(self): + return self.fakeMsId * 1000 + + def getHostHaConfigCmd(self, provider='kvmhaprovider'): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.provider = provider + cmd.hostid = self.host.id + return cmd + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.host.id + return cmd + + def getHost(self, hostId=None): + response = list_hosts( + self.apiClient, + zoneid=self.zone.id, + type='Routing', + id=hostId + ) + if len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No hosts found, skipping out-of-band management test") + + def getIpmiServerIp(self): + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"])) + return s.getsockname()[0] + + def getIpmiServerPort(self): + return self.serverPort + + def getOobmConfigCmd(self): + cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd() + cmd.driver = 'ipmitool' # The default available driver + cmd.address = self.getIpmiServerIp() + cmd.port = self.getIpmiServerPort() + cmd.username = 'admin' + cmd.password = 'password' + cmd.hostid = self.host.id + return cmd + + def getOobmEnableCmd(self): + cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd() + cmd.hostid = self.host.id + return cmd + + def getOobmDisableCmd(self): + cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd() + cmd.hostid = self.host.id + return cmd + + def getOobmIssueActionCmd(self): + cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd() + cmd.hostid = self.host.id + cmd.action = 'STATUS' + return cmd + + def issue_power_action_cmd(self, action, timeout=None): + cmd = self.getOobmIssueActionCmd() + cmd.action = action + if timeout: + cmd.timeout = timeout + + try: + return self.apiClient.issueOutOfBandManagementPowerAction(cmd) + except Exception as e: + if "packet session id 0x0 does not match active session" in str(e): + raise self.skipTest("Known ipmitool issue hit, skipping test") + raise e + + def configure_and_enable_oobm(self): + self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd()) + response = self.apiClient.enableOutOfBandManagementForHost(self.getOobmEnableCmd()) + self.assertEqual(response.enabled, True) + + def start_ipmi_server(self): + def startIpmiServer(tname, server): + self.debug("Starting ipmisim server") + try: + server.serve_forever() + except Exception: pass + IpmiServerContext('reset') + ThreadedIpmiServer.allow_reuse_address = False + server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer) + thread.start_new_thread(startIpmiServer, ("ipmi-server", server,)) + self.server = server + + def checkSyncToState(self, state, interval): + def checkForStateSync(expectedState): + response = self.getHost(hostId=self.host.id).outofbandmanagement + return response.powerstate == expectedState, None + + sync_interval = 1 + int(interval)/1000 + res, _ = wait_until(sync_interval, 10, checkForStateSync, state) + if not res: + self.fail("Failed to get host.powerstate synced to expected state:" + state) + response = self.getHost(hostId=self.host.id).outofbandmanagement + self.assertEqual(response.powerstate, state) + + def get_host_in_available_state(self): + + self.configure_and_start_ipmi_server() + self.assert_issue_command_state('ON', 'On') + self.configureAndEnableHostHa() + + self.check_host_transition_to_available() + + response = self.getHost() + if response.hostha.hastate is not "Available": + print response + + self.assertEqual(response.hostha.hastate, "Available") + + def configureAndEnableHostHa(self): + self.apiClient.configureHAForHost(self.getHostHaConfigCmd()) + + response = self.apiClient.enableHAForHost(self.getHostHaEnableCmd()) + self.assertEqual(response.haenable, True) + + def configure_and_start_ipmi_server(self, power_state=None): + """ + Setup ipmisim and enable out-of-band management for host + """ + self.configure_and_enable_oobm() + self.start_ipmi_server() + if power_state: + bmc = IpmiServerContext().bmc + bmc.powerstate = power_state + + def assert_issue_command_state(self, command, expected): + """ + Asserts power action result for a given power command + """ + if command != 'STATUS': + self.issue_power_action_cmd(command) + response = self.issue_power_action_cmd('STATUS') + self.assertEqual(response.powerstate, expected) + + def kill_agent(self): + t_end = time.time() + 90 + while time.time() < t_end: + try: + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')") + return + except Exception: + print("Cannot ssh into: " + self.host.ipaddress) + self.fail(self) + + def set_host_to_alert(self): + self.dbclient.execute("update host set host.status = 'Alert' where host.uuid = '%s'" % self.host.id) + + def check_host_transitioned_to_degraded(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Degraded": + return + else: + continue + self.fail(self) + + def wait_util_host_is_fencing(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Fencing": + return + else: + continue + self.fail(self) + + def check_host_transitioned_to_suspect(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Suspect": + return + else: + continue + self.fail(self) + + def check_host_transitioned_to_checking(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Checking": + return + else: + continue + self.fail(self) + + def wait_util_host_is_fenced(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Fenced": + return + else: + continue + self.fail(self) + + def wait_util_host_is_up(self): + t_end = time.time() + 120 + while time.time() < t_end: + host = self.getHost() + if host.state in "Up": + return + else: + continue + self.fail(self) + + def stop_agent(self): + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\ + ("service cloudstack-agent stop") + + def start_agent(self): + self.ssh_and_restart_agent() + self.check_host_transition_to_available() + + def ssh_and_restart_agent(self): + t_end = time.time() + 90 + while time.time() < t_end: + try: + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("service cloudstack-agent restart") + return + except Exception: + print("Cannot ssh into: " + self.host.ipaddress) + self.fail(self) + + def check_host_transition_to_available(self): + t_end = time.time() + 90 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate == "Available": + return + else: + continue + self.fail(self) + + def wait_util_host_is_recovered(self): + t_end = time.time() + 180 + while time.time() < t_end: + host = self.getHost() + if host.hostha.hastate in "Recovered": + return + else: + continue + self.fail(self) + + def reset_host(self): + SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], + passwd=self.hostConfig["password"]).execute \ + ("reboot") + + def deploy_vm(self): + vm = VirtualMachine.create( + self.apiClient, + services=self.services["virtual_machine"], + serviceofferingid=self.service_offering.id, + templateid=self.template.id, + zoneid=self.zone.id, + hostid = self.host.id, + method="POST" + ) + + self.cleanup.append(vm) + + def update_configuration(self, name, value): + update_configuration_cmd = updateConfiguration.updateConfigurationCmd() + update_configuration_cmd.name = name + update_configuration_cmd.value = value + update_configuration_cmd.clusterid = self.cluster_id + + self.apiClient.updateConfiguration(update_configuration_cmd) + + + @attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_stop_agent_host_is_degraded(self): + """ + Tests HA state turns Degraded when agent is stopped + """ + self.deploy_vm() + + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.stop_agent() + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_degraded() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Enable Host + self.start_agent() + + #@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_recovering_start_agent_host_is_available(self): + """ + Tests HA state turns Recovered when agent is stopped and host is reset + """ + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + # Then kills the agent and wait untill the state is Degraded + + self.deploy_vm() + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.kill_agent() + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_degraded() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Reset host so a shut down could be emulated. During the bootup host should transition into recovered state + self.reset_host() + + # Waits until Degraded host turns into Recovered for 180 seconds, + # if it fails it tries to revert host back to Available + try: + self.wait_util_host_is_recovered() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # SSH into the KVM Host and executes service cloudstack-agent restart of the agent + self.start_agent() + + #@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_fencing_host(self): + """ + Tests HA state turns Recovered when agent is stopped and host is reset, + then configure incorrect OOBM configuration, so that Recover command would fail + and host would transition into Fenced state. + """ + self.deploy_vm() + + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.kill_agent() + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_degraded() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Change OOBM Configuration to invalid so it would fail the recover operations. + cmd = self.getOobmConfigCmd() + cmd.address = "1.1.1.1" + self.apiClient.configureOutOfBandManagement(cmd) + + # Reset host so a shut down could be emulated. During the bootup host should transition into recovered state + self.reset_host() + self.kill_agent() + + # Waits until Recovering host turns into Fencing for 180 seconds, + # if it fails it tries to revert host back to Up + try: + self.wait_util_host_is_fencing() + except Exception as e: + self.ssh_and_restart_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Configure correct OOBM configuration so that the Fencing operation would succeed + self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd()) + + # Waits until Fencing host turns into Fenced for 180 seconds, + # if it fails it tries to revert host back to Up + try: + self.wait_util_host_is_fenced() + except Exception as e: + self.ssh_and_restart_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # SSH into the KVM Host and executes service cloudstack-agent restart of the agent + self.ssh_and_restart_agent() + + # Waits until state is Up so that cleanup would be successful + self.wait_util_host_is_up() + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true") + def test_ha_kill_agent_host_is_degraded(self): + """ + Tests HA state turns Suspect/Checking when some activity/health checks fail + Configures HA, Logs into to a host and restarts the service + Then it confirms the ha state jumps through Suspect -> Checking -> Available + """ + # Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available + self.get_host_in_available_state() + + # SSH into the KVM Host and executes kill -9 of the agent + self.ssh_and_restart_agent() + + # Checks if the host would turn into Suspect in the next 120 seconds + try: + self.check_host_transitioned_to_suspect() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Checks if the host would turn into Degraded in the next 120 seconds + try: + self.check_host_transitioned_to_checking() + except Exception as e: + self.start_agent() + raise Exception("Warning: Exception during test execution : %s" % e) + + # Enable Host + self.check_host_transition_to_available() diff --git a/test/integration/smoke/test_hostha_simulator.py b/test/integration/smoke/test_hostha_simulator.py new file mode 100644 index 00000000000..82163b33fd5 --- /dev/null +++ b/test/integration/smoke/test_hostha_simulator.py @@ -0,0 +1,656 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import marvin +from marvin.cloudstackTestCase import * +from marvin.cloudstackAPI import * +from marvin.lib.utils import * +from marvin.lib.base import * +from marvin.lib.common import * +from nose.plugins.attrib import attr + +import random + +from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer + +import random +import socket +import sys +import thread +import time + + +class TestHostHA(cloudstackTestCase): + """ Test cases for host HA using Simulator host(s) + """ + + def setUp(self): + self.apiclient = self.testClient.getApiClient() + self.hypervisor = self.testClient.getHypervisorInfo() + self.dbclient = self.testClient.getDbConnection() + self.services = self.testClient.getParsedTestDataConfig() + self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__ + self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20) + self.host = None + + # Cleanup any existing configs + self.dbclient.execute("delete from ha_config where resource_type='Host'") + + # use random port for ipmisim + s = socket.socket() + s.bind(('', 0)) + self.serverPort = s.getsockname()[1] + s.close() + + # Get a host to run tests against + self.host = self.getHost() + + self.cleanup = [] + + def tearDown(self): + try: + self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId()) + self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'") + self.dbclient.execute("delete from ha_config where resource_type='Host'") + self.dbclient.execute("update host set resource_state='Enabled' where type='Routing' and resource_state='Maintenance'") + cleanup_resources(self.apiclient, self.cleanup) + except Exception as e: + raise Exception("Warning: Exception during cleanup : %s" % e) + + def getFakeMsId(self): + return self.fakeMsId + + def getFakeMsRunId(self): + return self.fakeMsId * 1000 + + def getHost(self, hostId=None): + if self.host and hostId is None: + return self.host + + response = list_hosts( + self.apiclient, + type='Routing', + hypervisor='Simulator', + resourcestate='Enabled', + id=hostId + ) + if response and len(response) > 0: + self.host = response[0] + return self.host + raise self.skipTest("No simulator hosts found, skipping host-ha test") + + def getHostHaConfigCmd(self, provider='simulatorhaprovider'): + cmd = configureHAForHost.configureHAForHostCmd() + cmd.provider = provider + cmd.hostid = self.getHost().id + return cmd + + def getHostHaEnableCmd(self): + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def getHostHaDisableCmd(self): + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = self.getHost().id + return cmd + + def configureAndEnableHostHa(self, initialize=True): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd()) + self.assertEqual(response.haenable, True) + if initialize: + self.configureSimulatorHAProviderState(True, True, True, False) + + def configureAndDisableHostHa(self, hostId): + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + cmd.hostid = hostId + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + def enableHostHa(self, hostId): + cmd = self.getHostHaEnableCmd() + cmd.hostid = hostId + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + def configureSimulatorHAProviderState(self, health, activity, recover, fence): + cmd = configureSimulatorHAProviderState.configureSimulatorHAProviderStateCmd() + cmd.hostid = self.getHost().id + cmd.health = health + cmd.activity = activity + cmd.recover = recover + cmd.fence = fence + response = self.apiclient.configureSimulatorHAProviderState(cmd) + self.assertEqual(response.success, 'true') + + def getSimulatorHAStateTransitions(self, hostId): + cmd = listSimulatorHAStateTransitions.listSimulatorHAStateTransitionsCmd() + cmd.hostid = hostId + return self.apiclient.listSimulatorHAStateTransitions(cmd) + + def checkSyncToState(self, state, interval=5000): + def checkForStateSync(expectedState): + response = self.getHost(hostId=self.getHost().id).hostha + return response.hastate == expectedState, None + + sync_interval = 1 + int(interval) / 1000 + res, _ = wait_until(sync_interval, 50, checkForStateSync, state) + if not res: + self.fail("Failed to get host.hastate synced to expected state:" + state) + response = self.getHost(hostId=self.getHost().id).hostha + self.assertEqual(response.hastate, state) + + def get_non_configured_ha_host(self): + response = list_hosts( + self.apiclient, + type='Routing' + ) + for host in response: + if host.haprovider is None: + return host + else: + return None + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_enable_feature_without_setting_provider(self): + """ + Tests Enable HA without setting the provider, Exception is thrown + """ + host = self.get_non_configured_ha_host() + + if host is None: + cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.") + + cmd = self.getHostHaEnableCmd() + cmd.hostid = host.id + + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_list_providers(self): + """ + Tests default ha providers list + """ + cmd = listHostHAProviders.listHostHAProvidersCmd() + + cmd.hypervisor = 'Simulator' + response = self.apiclient.listHostHAProviders(cmd)[0] + self.assertEqual(response.haprovider, 'SimulatorHAProvider') + + cmd.hypervisor = 'KVM' + response = self.apiclient.listHostHAProviders(cmd)[0] + self.assertEqual(response.haprovider, 'KVMHAProvider') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_configure_invalid_provider(self): + """ + Tests host-ha configuration with invalid driver + """ + cmd = self.getHostHaConfigCmd() + cmd.provider = 'randomDriverThatDoesNotExist' + try: + response = self.apiclient.configureHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_configure_default_driver(self): + """ + Tests host-ha configuration with valid data + """ + cmd = self.getHostHaConfigCmd() + response = self.apiclient.configureHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haprovider, cmd.provider.lower()) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_enable_feature_invalid(self): + """ + Tests ha feature enable command with invalid options + """ + cmd = self.getHostHaEnableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.enableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + try: + cmd = enableHAForCluster.enableHAForClusterCmd() + response = self.apiclient.enableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + try: + cmd = enableHAForZone.enableHAForZoneCmd() + response = self.apiclient.enableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_disable_feature_invalid(self): + """ + Tests ha feature disable command with invalid options + """ + cmd = self.getHostHaDisableCmd() + cmd.hostid = -1 + try: + response = self.apiclient.disableHAForHost(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + try: + cmd = disableHAForCluster.disableHAForClusterCmd() + response = self.apiclient.disableHAForCluster(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + try: + cmd = disableHAForZone.disableHAForZoneCmd() + response = self.apiclient.disableHAForZone(cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_enable_feature_valid(self): + """ + Tests host-ha enable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_hostha_disable_feature_valid(self): + """ + Tests host-ha disable feature with valid options + """ + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaDisableCmd() + response = self.apiclient.disableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, False) + + response = self.getHost(hostId=cmd.hostid).hostha + self.assertEqual(response.hastate, 'Disabled') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_enabledisable_across_clusterzones(self): + """ + Tests ha enable/disable feature at cluster and zone level + Zone > Cluster > Host + """ + self.configureAndEnableHostHa() + + host = self.getHost() + self.checkSyncToState('Available') + response = self.getHost(hostId=host.id).hostha + self.assertTrue(response.hastate == 'Available') + + # Disable at host level + cmd = disableHAForHost.disableHAForHostCmd() + cmd.hostid = host.id + response = self.apiclient.disableHAForHost(cmd) + + # Disable at cluster level + cmd = disableHAForCluster.disableHAForClusterCmd() + cmd.clusterid = host.clusterid + response = self.apiclient.disableHAForCluster(cmd) + + # Disable at zone level + cmd = disableHAForZone.disableHAForZoneCmd() + cmd.zoneid = host.zoneid + response = self.apiclient.disableHAForZone(cmd) + + # HA state check + response = self.getHost(hostId=host.id).hostha + self.assertTrue(response.hastate == 'Disabled') + + # Check ha-state check and sync + self.dbclient.execute("update ha_config set ha_state='Available' where enabled='1' and resource_type='Host'") + self.checkSyncToState('Disabled') + + # Enable at zone level + cmd = enableHAForZone.enableHAForZoneCmd() + cmd.zoneid = host.zoneid + response = self.apiclient.enableHAForZone(cmd) + + # Enable at cluster level + cmd = enableHAForCluster.enableHAForClusterCmd() + cmd.clusterid = host.clusterid + response = self.apiclient.enableHAForCluster(cmd) + + # Enable at host level + cmd = enableHAForHost.enableHAForHostCmd() + cmd.hostid = host.id + response = self.apiclient.enableHAForHost(cmd) + + # Check state sync + self.checkSyncToState('Available') + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_multiple_mgmt_server_ownership(self): + """ + Tests ha resource ownership expiry across multi-mgmt server + """ + self.configureAndEnableHostHa() + + cloudstackVersion = Configurations.listCapabilities(self.apiclient).cloudstackversion + + currentMsHosts = [] + mshosts = self.dbclient.execute( + "select msid from mshost where version='%s' and removed is NULL and state='Up'" % (cloudstackVersion)) + if len(mshosts) > 0: + currentMsHosts = map(lambda row: row[0], mshosts) + + # Inject fake ms host + self.dbclient.execute( + "insert into mshost (msid,runid,name,state,version,service_ip,service_port,last_update) values (%s,%s,'ha-marvin-fakebox', 'Down', '%s', '127.0.0.1', '22', NOW())" % ( + self.getFakeMsId(), self.getFakeMsRunId(), cloudstackVersion)) + + # Pass ownership to the fake ms id + self.dbclient.execute( + "update ha_config set mgmt_server_id=%d where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'" % self.getFakeMsId()) + + pingInterval = float(list_configurations( + self.apiclient, + name='ping.interval' + )[0].value) + + pingTimeout = float(list_configurations( + self.apiclient, + name='ping.timeout' + )[0].value) + + def removeFakeMgmtServer(fakeMsRunId): + rows = self.dbclient.execute("select * from mshost_peer where peer_runid=%s" % fakeMsRunId) + if len(rows) > 0: + self.debug("Mgmt server is now trying to contact the fake mgmt server") + self.dbclient.execute("update mshost set removed=now() where runid=%s" % fakeMsRunId) + self.dbclient.execute("update mshost_peer set peer_state='Down' where peer_runid=%s" % fakeMsRunId) + return True, None + return False, None + + def checkHaOwnershipExpiry(fakeMsId): + rows = self.dbclient.execute( + "select mgmt_server_id from ha_config where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'") + if len(rows) > 0 and rows[0][0] != fakeMsId: + self.debug("HA resource ownership expired as node was detected to be gone") + return True, None + return False, None + + retry_interval = 1 + (pingInterval * pingTimeout / 10) + + res, _ = wait_until(retry_interval, 20, removeFakeMgmtServer, self.getFakeMsRunId()) + if not res: + self.fail("Management server failed to turn down or remove fake mgmt server") + + res, _ = wait_until(retry_interval, 100, checkHaOwnershipExpiry, self.getFakeMsId()) + if not res: + self.fail("Management server failed to expire ownership of fenced peer") + + self.debug("Testing ha background sync should claim new ownership") + self.checkSyncToState('Available') + + result = self.dbclient.execute( + "select mgmt_server_id from ha_config where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'") + newOwnerId = result[0][0] + self.assertTrue(newOwnerId in currentMsHosts) + + def checkFSMTransition(self, transition, event, haState, prevHaState, hasActiviyCounter, hasRecoveryCounter): + self.assertEqual(transition.event, event) + self.assertEqual(transition.hastate, haState) + self.assertEqual(transition.prevhastate, prevHaState) + if hasActiviyCounter: + self.assertTrue(transition.activitycounter > 0) + else: + self.assertEqual(transition.activitycounter, 0) + if hasRecoveryCounter: + self.assertTrue(transition.recoverycounter > 0) + else: + self.assertEqual(transition.recoverycounter, 0) + + def findFSMTransitionToState(self, state, host): + transitions = self.getSimulatorHAStateTransitions(host.id) + if not transitions: + return False, (None, None, None) + previousTransition = None + stateTransition = None + nextTransition = None + for transition in transitions: + if stateTransition: + nextTransition = transition + break + if transition.hastate == state: + stateTransition = transition + if not stateTransition: + previousTransition = transition + if stateTransition: + return True, (previousTransition, stateTransition, nextTransition,) + return False, (previousTransition, stateTransition, nextTransition,) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_available(self): + """ + Tests ha FSM transitions for valid healthy host + Simulates health check passing + """ + + host = self.getHost() + self.configureAndDisableHostHa(host.id) + self.configureSimulatorHAProviderState(True, True, True, False) + self.configureAndEnableHostHa(False) + + res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'available', host) + if not res: + self.fail("FSM did not transition to available state") + + self.checkFSMTransition(T, 'enabled', 'available', 'disabled', False, False) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_degraded(self): + """ + Tests ha FSM transitions leading to degraded state + Simulates health check failures with activity checks passing + FSM transitions should happen indefinitely between: + Available->Suspect<->Checking->Degraded->Available + """ + host = self.getHost() + self.configureSimulatorHAProviderState(False, True, True, False) + self.configureAndEnableHostHa(False) + + # Initial health check failure + res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'suspect', host) + if not res: + self.fail("FSM did not transition to suspect state") + + self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False) + + # Check transition to Degraded + res, (prevT, T, nextT) = wait_until(2, 50, self.findFSMTransitionToState, 'degraded', host) + if not res: + self.fail("FSM did not transition to degraded state") + + if prevT: + self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False) + self.checkFSMTransition(T, 'activitycheckfailureunderthresholdratio', 'degraded', 'checking', True, False) + if nextT: + self.checkFSMTransition(nextT, 'periodicrecheckresourceactivity', 'suspect', 'degraded', False, False) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_recovering(self): + """ + Tests ha FSM transitions leading to recovering + Simulates both health and activity check failures + FSM transitions should happen indefinitely between: + Available->Suspect<->Checking->Recovering->Recovered<-retry-loop->->Fencing + """ + host = self.getHost() + self.configureSimulatorHAProviderState(False, False, True, False) + self.configureAndEnableHostHa(False) + + # Initial health check failure + res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'suspect', host) + if not res: + self.fail("FSM did not transition to suspect state") + + self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False) + + # Check transition to recovering + res, (prevT, T, nextT) = wait_until(2, 100, self.findFSMTransitionToState, 'recovering', host) + if not res: + self.fail("FSM did not transition to recovering state") + + if prevT: + self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False) + self.checkFSMTransition(T, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, False) + if nextT: + self.checkFSMTransition(nextT, 'recovered', 'recovered', 'recovering', False, True) + + # Check transition to fencing due to recovery attempts exceeded + res, (prevT, T, nextT) = wait_until(2, 100, self.findFSMTransitionToState, 'fencing', host) + if not res: + self.fail("FSM did not transition to fencing state") + + if prevT: + self.checkFSMTransition(prevT, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, + True) + self.checkFSMTransition(T, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_ha_verify_fsm_fenced(self): + """ + Tests ha FSM transitions for failures leading to fenced state + FSM transitions should happen indefinitely between: + Available->Suspect<->Checking->Recovering<-fail recovery->->Fencing->Fenced + """ + host = self.getHost() + self.configureAndDisableHostHa(host.id) + self.configureSimulatorHAProviderState(False, False, False, True) + self.configureAndEnableHostHa(False) + + # Check for transition to fenced + res, (prevT, T, _) = wait_until(2, 100, self.findFSMTransitionToState, 'fenced', host) + if not res: + self.fail("FSM did not transition to fenced state") + + self.checkFSMTransition(prevT, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True) + self.checkFSMTransition(T, 'fenced', 'fenced', 'fencing', False, False) + + # TODO: add test case for HA vm reboot checks + + # Simulate manual recovery of host and cancel maintenance mode + self.configureSimulatorHAProviderState(True, True, True, False) + cancelCmd = cancelHostMaintenance.cancelHostMaintenanceCmd() + cancelCmd.id = host.id + self.apiclient.cancelHostMaintenance(cancelCmd) + + # Check for transition to available after manual recovery + res, (prevT, T, _) = wait_until(2, 100, self.findFSMTransitionToState, 'available', host) + if not res: + self.fail("FSM did not transition to available state") + + self.checkFSMTransition(prevT, 'healthcheckpassed', 'ineligible', 'fenced', False, False) + self.checkFSMTransition(T, 'eligible', 'available', 'ineligible', False, False) + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_configure_ha_provider_invalid(self): + """ + Tests configure HA Provider with invalid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + # Setup wrong configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + try: + self.apiclient.configureHAForHost(conf_ha_cmd) + except Exception: + pass + else: + self.fail("Expected an exception to be thrown, failing") + + @attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false") + def test_configure_ha_provider_valid(self): + """ + Tests configure HA Provider with valid provider options + """ + + # Enable ha for host + self.apiclient.configureHAForHost(self.getHostHaConfigCmd()) + cmd = self.getHostHaEnableCmd() + response = self.apiclient.enableHAForHost(cmd) + self.assertEqual(response.hostid, cmd.hostid) + self.assertEqual(response.haenable, True) + + host = self.getHost(response.hostid) + + + # Setup wrong configuration for the host + conf_ha_cmd = configureHAForHost.configureHAForHostCmd() + if host.hypervisor.lower() in "kvm": + conf_ha_cmd.provider = "kvmhaprovider" + if host.hypervisor.lower() in "simulator": + conf_ha_cmd.provider = "simulatorhaprovider" + + conf_ha_cmd.hostid = cmd.hostid + + # Call the configure HA provider API with not supported provider for HA + response = self.apiclient.configureHAForHost(conf_ha_cmd) + + # Check the response contains the set provider and hostID + self.assertEqual(response.haprovider, conf_ha_cmd.provider) + self.assertEqual(response.hostid, conf_ha_cmd.hostid) diff --git a/ui/css/cloudstack3.css b/ui/css/cloudstack3.css index d2087b542bf..987f35d88a2 100644 --- a/ui/css/cloudstack3.css +++ b/ui/css/cloudstack3.css @@ -12756,6 +12756,38 @@ div.ui-dialog div.autoscaler div.field-group div.form-container form div.form-it background-position: -137px -614px; } +.blankHAForHost .icon { + background-position: -266px -31px; +} + +.blankHAForHost:hover .icon { + background-position: -266px -31px; +} + +.configureHAForHost .icon { + background-position: -270px -148px; +} + +.configureHAForHost:hover .icon { + background-position: -270px -728px; +} + +.enableHA .icon { + background-position: -265px -93px; +} + +.enableHA:hover .icon { + background-position: -265px -673px; +} + +.disableHA .icon { + background-position: -265px -120px; +} + +.disableHA:hover .icon { + background-position: -265px -700px; +} + .blankOutOfBandManagement .icon { background-position: -266px -31px; } diff --git a/ui/l10n/en.js b/ui/l10n/en.js index 8cfa419e9b2..39e70727ba1 100644 --- a/ui/l10n/en.js +++ b/ui/l10n/en.js @@ -808,6 +808,12 @@ var dictionary = {"ICMP.code":"ICMP Code", "label.guest.traffic.vswitch.type":"Guest Traffic vSwitch Type", "label.guest.type":"Guest Type", "label.ha.enabled":"HA Enabled", +"label.ha.configure":"Configure HA", +"label.ha.disable":"Disable HA", +"label.ha.enable":"Enable HA", +"label.ha.provider":"HA Provider", +"label.ha.state":"HA State", +"label.ha":"HA", "label.health.check":"Health Check", "label.health.check.advanced.options":"Advanced Options:", "label.health.check.configurations.options":"Configuration Options:", diff --git a/ui/scripts/system.js b/ui/scripts/system.js index e9d0b5c011f..a0bcb3af61d 100755 --- a/ui/scripts/system.js +++ b/ui/scripts/system.js @@ -8144,6 +8144,80 @@ notification: { poll: pollAsyncJobResult } + }, + enableHA: { + label: 'label.ha.enable', + action: function (args) { + var data = { + zoneid: args.context.physicalResources[0].id + }; + $.ajax({ + url: createURL("enableHAForZone"), + data: data, + success: function (json) { + var jid = json.enablehaforzoneresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return zoneActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.enable'; + }, + notification: function (args) { + return 'label.ha.enable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + disableHA: { + label: 'label.ha.disable', + action: function (args) { + var data = { + zoneid: args.context.physicalResources[0].id + }; + $.ajax({ + url: createURL("disableHAForZone"), + data: data, + success: function (json) { + var jid = json.disablehaforzoneresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return zoneActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.disable'; + }, + notification: function (args) { + return 'label.ha.disable'; + } + }, + notification: { + poll: pollAsyncJobResult + } } }, tabs: { @@ -15004,8 +15078,81 @@ notification: { poll: pollAsyncJobResult } + }, + enableHA: { + label: 'label.ha.enable', + action: function (args) { + var data = { + clusterid: args.context.clusters[0].id + }; + $.ajax({ + url: createURL("enableHAForCluster"), + data: data, + success: function (json) { + var jid = json.enablehaforclusterresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return clusterActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.enable'; + }, + notification: function (args) { + return 'label.ha.enable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + disableHA: { + label: 'label.ha.disable', + action: function (args) { + var data = { + clusterid: args.context.clusters[0].id + }; + $.ajax({ + url: createURL("disableHAForCluster"), + data: data, + success: function (json) { + var jid = json.disablehaforclusterresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return clusterActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.disable'; + }, + notification: function (args) { + return 'label.ha.disable'; + } + }, + notification: { + poll: pollAsyncJobResult + } } - }, tabs: { @@ -16411,6 +16558,168 @@ } }, + blankHAForHost: { + label: '', + action: function (args) { + } + }, + + configureHAForHost: { + label: 'label.ha.configure', + messages: { + confirm: function (args) { + return 'label.ha.configure'; + }, + notification: function (args) { + return 'label.ha.configure'; + } + }, + createForm: { + title: 'label.ha.configure', + fields: { + provider: { + label: 'label.ha.provider', + validation: { + required: true + }, + select: function (args) { + $.ajax({ + url: createURL('listHostHAProviders'), + data: {'hypervisor': args.context.hosts[0].hypervisor}, + dataType: 'json', + success: function (json) { + var response = json.listhosthaprovidersresponse; + var items = []; + items.push({ + id: '', + description: _l('') + }); + if (response.haprovider) { + $.each(response.haprovider, function (idx, item) { + items.push({ + id: item.haprovider, + description: item.haprovider + }); + }); + } + args.response.success({ + data: items + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + } + } + } + }, + action: function (args) { + var data = args.data; + data.hostid = args.context.hosts[0].id; + $.ajax({ + url: createURL('configureHAForHost'), + data: data, + dataType: 'json', + success: function (json) { + var jid = json.configurehaforhostresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return hostActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + }); + }, + notification: { + poll: pollAsyncJobResult + } + }, + + enableHA: { + label: 'label.ha.enable', + action: function (args) { + var data = { + hostid: args.context.hosts[0].id, + }; + $.ajax({ + url: createURL("enableHAForHost"), + data: data, + success: function (json) { + var jid = json.enablehaforhostresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return hostActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.enable'; + }, + notification: function (args) { + return 'label.ha.enable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + + disableHA: { + label: 'label.ha.disable', + action: function (args) { + var data = { + hostid: args.context.hosts[0].id, + }; + $.ajax({ + url: createURL("disableHAForHost"), + data: data, + success: function (json) { + var jid = json.disablehaforhostresponse.jobid; + args.response.success({ + _custom: { + jobId: jid, + getActionFilter: function () { + return hostActionfilter; + } + } + }); + }, + error: function (json) { + args.response.error(parseXMLHttpResponse(json)); + } + + }); + }, + messages: { + confirm: function (args) { + return 'label.ha.disable'; + }, + notification: function (args) { + return 'label.ha.disable'; + } + }, + notification: { + poll: pollAsyncJobResult + } + }, + blankOutOfBandManagement: { label: '', action: function (args) { @@ -16745,6 +17054,9 @@ if (host.outofbandmanagement == null || !host.outofbandmanagement.enabled) { hiddenTabs.push("outofbandmanagement"); } + if (host.hostha == null || (host.hypervisor != 'KVM' && host.hypervisor != 'Simulator')) { + hiddenTabs.push("ha"); + } return hiddenTabs; }, tabs: { @@ -16793,6 +17105,12 @@ hypervisorversion: { label: 'label.hypervisor.version' }, + hastate: { + label: 'label.ha.state' + }, + haprovider: { + label: 'label.ha.provider' + }, hosttags: { label: 'label.host.tags', isEditable: true, @@ -16898,6 +17216,11 @@ if (item && item.outofbandmanagement) { item.powerstate = item.outofbandmanagement.powerstate; } + if (item && item.hostha) { + item.hastate = item.hostha.hastate; + item.haprovider = item.hostha.haprovider; + item.haenabled = item.hostha.haenable; + } $.ajax({ url: createURL("listDedicatedHosts&hostid=" + args.context.hosts[0].id), @@ -16930,6 +17253,39 @@ } }, + ha: { + title: 'label.ha', + fields: { + haenable: { + label: 'label.ha.enabled', + converter: cloudStack.converters.toBooleanText + }, + hastate: { + label: 'label.ha.state' + }, + haprovider: { + label: 'label.ha.provider' + }, + }, + dataProvider: function (args) { + $.ajax({ + url: createURL("listHosts&id=" + args.context.hosts[0].id), + dataType: "json", + async: true, + success: function (json) { + var host = json.listhostsresponse.host[0]; + var hostha = {}; + if (host && host.hostha) { + hostha = host.hostha; + } + args.response.success({ + data: hostha + }); + } + }); + } + }, + outofbandmanagement: { title: 'label.outofbandmanagement', fields: { @@ -21218,6 +21574,12 @@ allowedActions.push("disableOutOfBandManagement"); } + if (jsonObj.hasOwnProperty('resourcedetails') && jsonObj['resourcedetails'].hasOwnProperty('resourceHAEnabled') && jsonObj['resourcedetails']['resourceHAEnabled'] == 'false') { + allowedActions.push("enableHA"); + } else { + allowedActions.push("disableHA"); + } + return allowedActions; } @@ -21309,6 +21671,12 @@ allowedActions.push("disableOutOfBandManagement"); } + if (jsonObj.hasOwnProperty('resourcedetails') && jsonObj['resourcedetails'].hasOwnProperty('resourceHAEnabled') && jsonObj['resourcedetails']['resourceHAEnabled'] == 'false') { + allowedActions.push("enableHA"); + } else { + allowedActions.push("disableHA"); + } + return allowedActions; } @@ -21345,6 +21713,14 @@ allowedActions.push("remove"); } + allowedActions.push("blankHAForHost"); + allowedActions.push("configureHAForHost"); + if (jsonObj.hasOwnProperty("hostha") && jsonObj.hostha.haenable) { + allowedActions.push("disableHA"); + } else { + allowedActions.push("enableHA"); + } + allowedActions.push("blankOutOfBandManagement"); allowedActions.push("configureOutOfBandManagement"); if (jsonObj.hasOwnProperty("outofbandmanagement") && jsonObj.outofbandmanagement.enabled) {