CLOUDSTACK-9782: Host HA and KVM HA provider

Host-HA offers investigation, fencing and recovery mechanisms for host that for
any reason are malfunctioning. It uses Activity and Health checks to determine
current host state based on which it may degrade a host or try to recover it. On
failing to recover it, it may try to fence the host.

The core feature is implemented in a hypervisor agnostic way, with two separate
implementations of the driver/provider for Simulator and KVM hypervisors. The
framework also allows for implementation of other hypervisor specific provider
implementation in future.

The Host-HA provider implementation for KVM hypervisor uses the out-of-band
management sub-system to issue IPMI calls to reset (recover) or poweroff (fence)
a host.

The Host-HA provider implementation for Simulator provides a means of testing
and validating the core framework implementation.

Signed-off-by: Abhinandan Prateek <abhinandan.prateek@shapeblue.com>
Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
Rohit Yadav 2017-02-15 17:36:18 +05:30 committed by Rohit Yadav
parent 6e832b55dc
commit 212e5ccfa7
102 changed files with 8194 additions and 334 deletions

View File

@ -44,6 +44,10 @@ env:
smoke/test_dynamicroles
smoke/test_global_settings
smoke/test_guest_vlan_range
smoke/test_ha_for_host
smoke/test_ha_kvm_agent
smoke/test_ha_kvm
smoke/test_hostha_simulator
smoke/test_hosts
smoke/test_internal_lb
smoke/test_iso
@ -52,6 +56,7 @@ env:
smoke/test_login
smoke/test_metrics_api
smoke/test_multipleips_per_nic
smoke/test_nested_virtualization
smoke/test_network
smoke/test_network_acl
smoke/test_nic

View File

@ -18,15 +18,14 @@ package com.cloud.dc;
import com.cloud.org.Grouping;
import org.apache.cloudstack.acl.InfrastructureEntity;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.api.InternalIdentity;
import org.apache.cloudstack.kernel.Partition;
import java.util.Map;
/**
*
*/
public interface DataCenter extends InfrastructureEntity, Grouping, Identity, InternalIdentity {
public interface DataCenter extends InfrastructureEntity, Grouping, Partition {
public enum NetworkType {
Basic, Advanced,

View File

@ -22,6 +22,7 @@ import java.util.Map;
import org.apache.cloudstack.acl.Role;
import org.apache.cloudstack.acl.RolePermission;
import org.apache.cloudstack.config.Configuration;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.usage.Usage;
import com.cloud.dc.DataCenter;
@ -323,6 +324,12 @@ public class EventTypes {
public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD = "HOST.OOBM.CHANGEPASSWORD";
public static final String EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION = "HOST.OOBM.POWERSTATE.TRANSITION";
// HA
public static final String EVENT_HA_RESOURCE_ENABLE = "HA.RESOURCE.ENABLE";
public static final String EVENT_HA_RESOURCE_DISABLE = "HA.RESOURCE.DISABLE";
public static final String EVENT_HA_RESOURCE_CONFIGURE = "HA.RESOURCE.CONFIGURE";
public static final String EVENT_HA_STATE_TRANSITION = "HA.STATE.TRANSITION";
// Maintenance
public static final String EVENT_MAINTENANCE_CANCEL = "MAINT.CANCEL";
public static final String EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE = "MAINT.CANCEL.PS";
@ -779,6 +786,12 @@ public class EventTypes {
entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_CHANGE_PASSWORD, Host.class);
entityEventDetails.put(EVENT_HOST_OUTOFBAND_MANAGEMENT_POWERSTATE_TRANSITION, Host.class);
// HA
entityEventDetails.put(EVENT_HA_RESOURCE_ENABLE, HAConfig.class);
entityEventDetails.put(EVENT_HA_RESOURCE_DISABLE, HAConfig.class);
entityEventDetails.put(EVENT_HA_RESOURCE_CONFIGURE, HAConfig.class);
entityEventDetails.put(EVENT_HA_STATE_TRANSITION, HAConfig.class);
// Maintenance
entityEventDetails.put(EVENT_MAINTENANCE_CANCEL, Host.class);
entityEventDetails.put(EVENT_MAINTENANCE_CANCEL_PRIMARY_STORAGE, Host.class);

View File

@ -16,19 +16,19 @@
// under the License.
package com.cloud.host;
import java.util.Date;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.api.InternalIdentity;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceState;
import com.cloud.utils.fsm.StateObject;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.kernel.Partition;
import java.util.Date;
/**
* Host represents one particular host server.
*/
public interface Host extends StateObject<Status>, Identity, InternalIdentity {
public interface Host extends StateObject<Status>, Identity, Partition, HAResource {
public enum Type {
Storage(false), Routing(false), SecondaryStorage(false), SecondaryStorageCmdExecutor(false), ConsoleProxy(true), ExternalFirewall(false), ExternalLoadBalancer(
false), ExternalVirtualSwitchSupervisor(false), PxeServer(false), BaremetalPxe(false), BaremetalDhcp(false), TrafficMonitor(false), NetScalerControlCenter(false),
@ -202,5 +202,7 @@ public interface Host extends StateObject<Status>, Identity, InternalIdentity {
boolean isInMaintenanceStates();
boolean isDisabled();
ResourceState getResourceState();
}

View File

@ -150,12 +150,14 @@ public enum Status {
s_fsm.addTransition(Status.Down, Event.ManagementServerDown, Status.Down);
s_fsm.addTransition(Status.Down, Event.AgentDisconnected, Status.Down);
s_fsm.addTransition(Status.Down, Event.PingTimeout, Status.Down);
s_fsm.addTransition(Status.Down, Event.HostDown, Status.Down);
s_fsm.addTransition(Status.Alert, Event.AgentConnected, Status.Connecting);
s_fsm.addTransition(Status.Alert, Event.Ping, Status.Up);
s_fsm.addTransition(Status.Alert, Event.Remove, Status.Removed);
s_fsm.addTransition(Status.Alert, Event.ManagementServerDown, Status.Alert);
s_fsm.addTransition(Status.Alert, Event.AgentDisconnected, Status.Alert);
s_fsm.addTransition(Status.Alert, Event.ShutdownRequested, Status.Disconnected);
s_fsm.addTransition(Status.Alert, Event.HostDown, Status.Down);
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceFailed, Status.Disconnected);
s_fsm.addTransition(Status.Rebalancing, Event.RebalanceCompleted, Status.Connecting);
s_fsm.addTransition(Status.Rebalancing, Event.ManagementServerDown, Status.Disconnected);

View File

@ -16,13 +16,11 @@
// under the License.
package com.cloud.org;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.api.InternalIdentity;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.org.Managed.ManagedState;
import org.apache.cloudstack.kernel.Partition;
public interface Cluster extends Grouping, InternalIdentity, Identity {
public interface Cluster extends Grouping, Partition {
public static enum ClusterType {
CloudManaged, ExternalManaged;
};

View File

@ -93,6 +93,7 @@ public enum ResourceState {
s_fsm.addTransition(ResourceState.Enabled, Event.InternalCreated, ResourceState.Enabled);
s_fsm.addTransition(ResourceState.Enabled, Event.Disable, ResourceState.Disabled);
s_fsm.addTransition(ResourceState.Enabled, Event.AdminAskMaintenace, ResourceState.PrepareForMaintenance);
s_fsm.addTransition(ResourceState.Enabled, Event.InternalEnterMaintenance, ResourceState.Maintenance);
s_fsm.addTransition(ResourceState.Disabled, Event.Enable, ResourceState.Enabled);
s_fsm.addTransition(ResourceState.Disabled, Event.Disable, ResourceState.Disabled);
s_fsm.addTransition(ResourceState.Disabled, Event.InternalCreated, ResourceState.Disabled);
@ -109,5 +110,7 @@ public enum ResourceState {
s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.InternalEnterMaintenance, ResourceState.Maintenance);
s_fsm.addTransition(ResourceState.ErrorInMaintenance, Event.AdminCancelMaintenance, ResourceState.Enabled);
s_fsm.addTransition(ResourceState.Error, Event.InternalCreated, ResourceState.Error);
s_fsm.addTransition(ResourceState.Disabled, Event.DeleteHost, ResourceState.Disabled);
}
}

View File

@ -16,26 +16,24 @@
// under the License.
package com.cloud.vm;
import java.util.Arrays;
import java.util.Date;
import java.util.Map;
import org.apache.cloudstack.acl.ControlledEntity;
import org.apache.cloudstack.api.Displayable;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.api.InternalIdentity;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.utils.fsm.StateMachine2;
import com.cloud.utils.fsm.StateMachine2.Transition;
import com.cloud.utils.fsm.StateMachine2.Transition.Impact;
import com.cloud.utils.fsm.StateObject;
import org.apache.cloudstack.acl.ControlledEntity;
import org.apache.cloudstack.api.Displayable;
import org.apache.cloudstack.kernel.Partition;
import java.util.Arrays;
import java.util.Date;
import java.util.Map;
/**
* VirtualMachine describes the properties held by a virtual machine
*
*/
public interface VirtualMachine extends RunningOn, ControlledEntity, Identity, InternalIdentity, Displayable, StateObject<VirtualMachine.State> {
public interface VirtualMachine extends RunningOn, ControlledEntity, Partition, Displayable, StateObject<VirtualMachine.State> {
public enum PowerState {
PowerUnknown,

View File

@ -16,12 +16,12 @@
// under the License.
package org.apache.cloudstack.alert;
import java.util.HashSet;
import java.util.Set;
import com.cloud.capacity.Capacity;
import com.cloud.exception.InvalidParameterValueException;
import java.util.HashSet;
import java.util.Set;
public interface AlertService {
public static class AlertType {
private static Set<AlertType> defaultAlertTypes = new HashSet<AlertType>();
@ -67,6 +67,7 @@ public interface AlertService {
public static final AlertType ALERT_TYPE_SYNC = new AlertType((short)27, "ALERT.TYPE.SYNC", true);
public static final AlertType ALERT_TYPE_UPLOAD_FAILED = new AlertType((short)28, "ALERT.UPLOAD.FAILED", true);
public static final AlertType ALERT_TYPE_OOBM_AUTH_ERROR = new AlertType((short)29, "ALERT.OOBM.AUTHERROR", true);
public static final AlertType ALERT_TYPE_HA_ACTION = new AlertType((short)30, "ALERT.HA.ACTION", true);
public static final AlertType ALERT_TYPE_CA_CERT = new AlertType((short)31, "ALERT.CA.CERT", true);
public short getType() {

View File

@ -21,6 +21,7 @@ public class ApiConstants {
public static final String ACCOUNTS = "accounts";
public static final String ACCOUNT_TYPE = "accounttype";
public static final String ACCOUNT_ID = "accountid";
public static final String ACTIVITY = "activity";
public static final String ADDRESS = "address";
public static final String ALGORITHM = "algorithm";
public static final String ALLOCATED_ONLY = "allocatedonly";
@ -98,6 +99,7 @@ public class ApiConstants {
public static final String DOMAIN_ID = "domainid";
public static final String DOMAIN__ID = "domainId";
public static final String DURATION = "duration";
public static final String ELIGIBLE = "eligible";
public static final String EMAIL = "email";
public static final String END_DATE = "enddate";
public static final String END_IP = "endip";
@ -105,6 +107,7 @@ public class ApiConstants {
public static final String END_PORT = "endport";
public static final String ENTRY_TIME = "entrytime";
public static final String EXPIRES = "expires";
public static final String FENCE = "fence";
public static final String FETCH_LATEST = "fetchlatest";
public static final String FIRSTNAME = "firstname";
public static final String FORCED = "forced";
@ -123,6 +126,9 @@ public class ApiConstants {
public static final String GUEST_CIDR_ADDRESS = "guestcidraddress";
public static final String GUEST_VLAN_RANGE = "guestvlanrange";
public static final String HA_ENABLE = "haenable";
public static final String HA_PROVIDER = "haprovider";
public static final String HA_STATE = "hastate";
public static final String HEALTH = "health";
public static final String HOST_ID = "hostid";
public static final String HOST_NAME = "hostname";
public static final String HYPERVISOR = "hypervisor";
@ -220,6 +226,7 @@ public class ApiConstants {
public static final String PUBLIC_ZONE = "publiczone";
public static final String RECEIVED_BYTES = "receivedbytes";
public static final String RECONNECT = "reconnect";
public static final String RECOVER = "recover";
public static final String REQUIRES_HVM = "requireshvm";
public static final String RESOURCE_TYPE = "resourcetype";
public static final String RESPONSE = "response";

View File

@ -0,0 +1,127 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.host.Host;
import com.cloud.user.Account;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseAsyncCmd;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.HostHAResponse;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAConfigManager;
import org.apache.cloudstack.ha.HAResource;
import javax.inject.Inject;
@APICommand(name = ConfigureHAForHostCmd.APINAME, description = "Configures HA for a host",
responseObject = HostHAResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class ConfigureHAForHostCmd extends BaseAsyncCmd {
public static final String APINAME = "configureHAForHost";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class,
description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long hostId;
@Parameter(name = ApiConstants.PROVIDER, type = CommandType.STRING,
description = "HA provider", required = true, validations = {ApiArgValidator.NotNullOrEmpty})
private String haProvider;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getHostId() {
return hostId;
}
public String getHaProvider() {
return haProvider;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return Account.ACCOUNT_ID_SYSTEM;
}
private void setupResponse(final boolean result, final String resourceUuid) {
final HostHAResponse response = new HostHAResponse();
response.setId(resourceUuid);
response.setProvider(getHaProvider().toLowerCase());
response.setResponseName(getCommandName());
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final Host host = _resourceService.getHost(getHostId());
if (host == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId());
}
final boolean result = haConfigManager.configureHA(host.getId(), HAResource.ResourceType.Host, getHaProvider());
if (!result) {
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to configure HA provider for the host");
}
CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA configured with provider: " + getHaProvider());
CallContext.current().putContextParameter(Host.class, host.getUuid());
setupResponse(result, host.getUuid());
}
@Override
public String getEventType() {
return EventTypes.EVENT_HA_RESOURCE_DISABLE;
}
@Override
public String getEventDescription() {
return "configure HA for host: " + getHostId();
}
}

View File

@ -0,0 +1,114 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.org.Cluster;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseAsyncCmd;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.ClusterResponse;
import org.apache.cloudstack.api.response.SuccessResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAConfigManager;
import javax.inject.Inject;
@APICommand(name = DisableHAForClusterCmd.APINAME, description = "Disables HA cluster-wide",
responseObject = SuccessResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class DisableHAForClusterCmd extends BaseAsyncCmd {
public static final String APINAME = "disableHAForCluster";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.CLUSTER_ID, type = BaseCmd.CommandType.UUID, entityType = ClusterResponse.class,
description = "ID of the cluster", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long clusterId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getClusterId() {
return clusterId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
private void setupResponse(final boolean result) {
final SuccessResponse response = new SuccessResponse();
response.setSuccess(result);
response.setResponseName(getCommandName());
response.setObjectName("ha");
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final Cluster cluster = _resourceService.getCluster(getClusterId());
if (cluster == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find cluster by ID: " + getClusterId());
}
final boolean result = haConfigManager.disableHA(cluster);
CallContext.current().setEventDetails("Cluster Id:" + cluster.getId() + " HA enabled: false");
CallContext.current().putContextParameter(Cluster.class, cluster.getUuid());
setupResponse(result);
}
@Override
public String getEventType() {
return EventTypes.EVENT_HA_RESOURCE_DISABLE;
}
@Override
public String getEventDescription() {
return "disable HA for cluster: " + getClusterId();
}
}

View File

@ -0,0 +1,116 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.host.Host;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseAsyncCmd;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.HostHAResponse;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAConfigManager;
import org.apache.cloudstack.ha.HAResource;
import javax.inject.Inject;
@APICommand(name = DisableHAForHostCmd.APINAME, description = "Disables HA for a host",
responseObject = HostHAResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class DisableHAForHostCmd extends BaseAsyncCmd {
public static final String APINAME = "disableHAForHost";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class,
description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long hostId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getHostId() {
return hostId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
private void setupResponse(final boolean result, final String resourceUuid) {
final HostHAResponse response = new HostHAResponse();
response.setId(resourceUuid);
response.setEnabled(false);
response.setStatus(result);
response.setResponseName(getCommandName());
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final Host host = _resourceService.getHost(getHostId());
if (host == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId());
}
final boolean result = haConfigManager.disableHA(host.getId(), HAResource.ResourceType.Host);
CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA enabled: false");
CallContext.current().putContextParameter(Host.class, host.getUuid());
setupResponse(result, host.getUuid());
}
@Override
public String getEventType() {
return EventTypes.EVENT_HA_RESOURCE_DISABLE;
}
@Override
public String getEventDescription() {
return "disable HA for host: " + getHostId();
}
}

View File

@ -0,0 +1,115 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.dc.DataCenter;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseAsyncCmd;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.SuccessResponse;
import org.apache.cloudstack.api.response.ZoneResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAConfigManager;
import javax.inject.Inject;
@APICommand(name = DisableHAForZoneCmd.APINAME, description = "Disables HA for a zone",
responseObject = SuccessResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class DisableHAForZoneCmd extends BaseAsyncCmd {
public static final String APINAME = "disableHAForZone";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.ZONE_ID, type = BaseCmd.CommandType.UUID, entityType = ZoneResponse.class,
description = "ID of the zone", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long zoneId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getZoneId() {
return zoneId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
private void setupResponse(final boolean result) {
final SuccessResponse response = new SuccessResponse();
response.setSuccess(result);
response.setResponseName(getCommandName());
response.setObjectName("ha");
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final DataCenter dataCenter = _resourceService.getZone(getZoneId());
if (dataCenter == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find zone by ID: " + getZoneId());
}
final boolean result = haConfigManager.disableHA(dataCenter);
CallContext.current().setEventDetails("Zone Id:" + dataCenter.getId() + " HA enabled: false");
CallContext.current().putContextParameter(DataCenter.class, dataCenter.getUuid());
setupResponse(result);
}
@Override
public String getEventType() {
return EventTypes.EVENT_HA_RESOURCE_DISABLE;
}
@Override
public String getEventDescription() {
return "disable HA for zone: " + getZoneId();
}
}

View File

@ -0,0 +1,114 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.org.Cluster;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseAsyncCmd;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.ClusterResponse;
import org.apache.cloudstack.api.response.SuccessResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAConfigManager;
import javax.inject.Inject;
@APICommand(name = EnableHAForClusterCmd.APINAME, description = "Enables HA cluster-wide",
responseObject = SuccessResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class EnableHAForClusterCmd extends BaseAsyncCmd {
public static final String APINAME = "enableHAForCluster";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.CLUSTER_ID, type = BaseCmd.CommandType.UUID, entityType = ClusterResponse.class,
description = "ID of the cluster", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long clusterId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getClusterId() {
return clusterId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
private void setupResponse(final boolean result) {
final SuccessResponse response = new SuccessResponse();
response.setSuccess(result);
response.setResponseName(getCommandName());
response.setObjectName("ha");
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final Cluster cluster = _resourceService.getCluster(getClusterId());
if (cluster == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find cluster by ID: " + getClusterId());
}
final boolean result = haConfigManager.enableHA(cluster);
CallContext.current().setEventDetails("Cluster Id:" + cluster.getId() + " HA enabled: true");
CallContext.current().putContextParameter(Cluster.class, cluster.getUuid());
setupResponse(result);
}
@Override
public String getEventType() {
return EventTypes.EVENT_HA_RESOURCE_ENABLE;
}
@Override
public String getEventDescription() {
return "enable HA for cluster: " + getClusterId();
}
}

View File

@ -0,0 +1,116 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.host.Host;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseAsyncCmd;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.HostHAResponse;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAConfigManager;
import org.apache.cloudstack.ha.HAResource;
import javax.inject.Inject;
@APICommand(name = EnableHAForHostCmd.APINAME, description = "Enables HA for a host",
responseObject = HostHAResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class EnableHAForHostCmd extends BaseAsyncCmd {
public static final String APINAME = "enableHAForHost";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class,
description = "ID of the host", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long hostId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getHostId() {
return hostId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
private void setupResponse(final boolean result, final String resourceUuid) {
final HostHAResponse response = new HostHAResponse();
response.setId(resourceUuid);
response.setEnabled(true);
response.setStatus(result);
response.setResponseName(getCommandName());
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final Host host = _resourceService.getHost(getHostId());
if (host == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId());
}
final boolean result = haConfigManager.enableHA(host.getId(), HAResource.ResourceType.Host);
CallContext.current().setEventDetails("Host Id:" + host.getId() + " HA enabled: true");
CallContext.current().putContextParameter(Host.class, host.getUuid());
setupResponse(result, host.getUuid());
}
@Override
public String getEventType() {
return EventTypes.EVENT_HA_RESOURCE_ENABLE;
}
@Override
public String getEventDescription() {
return "enable HA for host: " + getHostId();
}
}

View File

@ -0,0 +1,115 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.dc.DataCenter;
import com.cloud.event.EventTypes;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseAsyncCmd;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.SuccessResponse;
import org.apache.cloudstack.api.response.ZoneResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAConfigManager;
import javax.inject.Inject;
@APICommand(name = EnableHAForZoneCmd.APINAME, description = "Enables HA for a zone",
responseObject = SuccessResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class EnableHAForZoneCmd extends BaseAsyncCmd {
public static final String APINAME = "enableHAForZone";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.ZONE_ID, type = BaseCmd.CommandType.UUID, entityType = ZoneResponse.class,
description = "ID of the zone", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long zoneId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getZoneId() {
return zoneId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
private void setupResponse(final boolean result) {
final SuccessResponse response = new SuccessResponse();
response.setSuccess(result);
response.setResponseName(getCommandName());
response.setObjectName("ha");
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final DataCenter dataCenter = _resourceService.getZone(getZoneId());
if (dataCenter == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find zone by ID: " + getZoneId());
}
final boolean result = haConfigManager.enableHA(dataCenter);
CallContext.current().setEventDetails("Zone Id:" + dataCenter.getId() + " HA enabled: true");
CallContext.current().putContextParameter(DataCenter.class, dataCenter.getUuid());
setupResponse(result);
}
@Override
public String getEventType() {
return EventTypes.EVENT_HA_RESOURCE_ENABLE;
}
@Override
public String getEventDescription() {
return "enable HA for zone: " + getZoneId();
}
}

View File

@ -0,0 +1,106 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.user.Account;
import com.google.common.base.Enums;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.HAProviderResponse;
import org.apache.cloudstack.api.response.HostHAResponse;
import org.apache.cloudstack.api.response.ListResponse;
import org.apache.cloudstack.ha.HAConfigManager;
import org.apache.cloudstack.ha.HAResource;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
@APICommand(name = ListHostHAProvidersCmd.APINAME, description = "Lists HA providers", responseObject = HostHAResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class ListHostHAProvidersCmd extends BaseCmd {
public static final String APINAME = "listHostHAProviders";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.HYPERVISOR, type = CommandType.STRING, required = true,
description = "Hypervisor type of the resource", validations = {ApiArgValidator.NotNullOrEmpty})
private String hypervisorType;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public HAResource.ResourceSubType getHypervisorType() {
return HAResource.ResourceSubType.valueOf(hypervisorType);
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return Account.ACCOUNT_ID_SYSTEM;
}
private void setupResponse(final List<String> hostHAProviderList) {
final ListResponse<HAProviderResponse> response = new ListResponse<>();
final List<HAProviderResponse> hostHAResponses = new ArrayList<>();
for (final String provider : hostHAProviderList) {
final HAProviderResponse haProviderResponse = new HAProviderResponse();
haProviderResponse.setProvider(provider);
hostHAResponses.add(haProviderResponse);
}
response.setResponses(hostHAResponses);
response.setResponseName(getCommandName());
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
if (!Enums.getIfPresent(HAResource.ResourceSubType.class, hypervisorType).isPresent()) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Invalid or unsupported host hypervisor type provided. Supported types are: " + Arrays.toString(HAResource.ResourceSubType.values()));
}
final List<String> hostHAProviders = haConfigManager.listHAProviders(HAResource.ResourceType.Host, getHypervisorType());
setupResponse(hostHAProviders);
}
}

View File

@ -0,0 +1,109 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.ha;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.host.Host;
import com.cloud.user.Account;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.HostHAResponse;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.api.response.ListResponse;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAConfigManager;
import org.apache.cloudstack.ha.HAResource;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.List;
@APICommand(name = ListHostHAResourcesCmd.APINAME, description = "Lists host HA resources", responseObject = HostHAResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false,
since = "4.11", authorized = {RoleType.Admin})
public final class ListHostHAResourcesCmd extends BaseCmd {
public static final String APINAME = "listHostHAResources";
@Inject
private HAConfigManager haConfigManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.HOST_ID, type = CommandType.UUID, entityType = HostResponse.class,
description = "List by host ID", validations = {ApiArgValidator.PositiveNumber})
private Long hostId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getHostId() {
return hostId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return Account.ACCOUNT_ID_SYSTEM;
}
private void setupResponse(final List<HAConfig> hostHAConfigList) {
final ListResponse<HostHAResponse> response = new ListResponse<>();
final List<HostHAResponse> hostHAResponses = new ArrayList<>();
for (final HAConfig config : hostHAConfigList) {
final Host host = _resourceService.getHost(config.getResourceId());
if (host == null) {
continue;
}
final HostHAResponse hostHAResponse = new HostHAResponse();
hostHAResponse.setId(host.getUuid());
hostHAResponse.setEnabled(config.isEnabled());
hostHAResponse.setHaState(config.getState());
hostHAResponse.setProvider(config.getHaProvider());
hostHAResponses.add(hostHAResponse);
}
response.setResponses(hostHAResponses);
response.setResponseName(getCommandName());
setResponseObject(response);
}
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final List<HAConfig> hostHAConfig = haConfigManager.listHAResources(getHostId(), HAResource.ResourceType.Host);
setupResponse(hostHAConfig);
}
}

View File

@ -108,4 +108,8 @@ public class PrepareForMaintenanceCmd extends BaseAsyncCmd {
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to prepare host for maintenance");
}
}
public void setHostId(final Long hostId) {
id = hostId;
}
}

View File

@ -0,0 +1,58 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.response;
import com.cloud.serializer.Param;
import com.google.gson.annotations.SerializedName;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.api.EntityReference;
import org.apache.cloudstack.ha.HAConfig;
import java.util.List;
@EntityReference(value = HAConfig.class)
public final class HAProviderResponse extends BaseResponse {
@SerializedName(ApiConstants.HA_PROVIDER)
@Param(description = "the HA provider")
private String provider;
@SerializedName(ApiConstants.TYPE)
@Param(description = "the HA provider resource type detail")
private List<String> supportedResourceTypes;
public HAProviderResponse() {
super("haprovider");
}
public String getProvider() {
return provider;
}
public void setProvider(String provider) {
this.provider = provider;
}
public List<String> getSupportedResourceTypes() {
return supportedResourceTypes;
}
public void setSupportedResourceTypes(List<String> supportedResourceTypes) {
this.supportedResourceTypes = supportedResourceTypes;
}
}

View File

@ -0,0 +1,104 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.response;
import com.cloud.serializer.Param;
import com.google.gson.annotations.SerializedName;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.api.EntityReference;
import org.apache.cloudstack.ha.HAConfig;
@EntityReference(value = HAConfig.class)
public final class HostHAResponse extends BaseResponse {
@SerializedName(ApiConstants.HOST_ID)
@Param(description = "the ID of the host")
private String id;
@SerializedName(ApiConstants.HA_ENABLE)
@Param(description = "if host HA is enabled for the host")
private Boolean enabled;
@SerializedName(ApiConstants.HA_STATE)
@Param(description = "the HA state of the host")
private HAConfig.HAState haState;
@SerializedName(ApiConstants.HA_PROVIDER)
@Param(description = "the host HA provider")
private String provider;
@SerializedName(ApiConstants.STATUS)
@Param(description = "operation status")
private Boolean status;
public HostHAResponse() {
super("hostha");
}
public HostHAResponse(final HAConfig config) {
this();
if (config == null) {
this.enabled = false;
this.haState = HAConfig.HAState.Disabled;
return;
}
setProvider(config.getHaProvider());
setEnabled(config.isEnabled());
setHaState(config.getState());
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public Boolean getEnabled() {
return enabled;
}
public void setEnabled(Boolean enabled) {
this.enabled = enabled;
}
public HAConfig.HAState getHaState() {
return haState;
}
public void setHaState(HAConfig.HAState haState) {
this.haState = haState;
}
public String getProvider() {
return provider;
}
public void setProvider(String provider) {
this.provider = provider;
}
public Boolean getStatus() {
return status;
}
public void setStatus(Boolean status) {
this.status = status;
}
}

View File

@ -24,6 +24,7 @@ import com.google.gson.annotations.SerializedName;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.api.EntityReference;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement;
import java.util.Date;
@ -206,6 +207,10 @@ public class HostResponse extends BaseResponse {
@Param(description = "true if this host is suitable(has enough capacity and satisfies all conditions like hosttags, max guests vm limit etc) to migrate a VM to it , false otherwise")
private Boolean suitableForMigration;
@SerializedName("hostha")
@Param(description = "the host HA information information")
private HostHAResponse hostHAResponse;
@SerializedName("outofbandmanagement")
@Param(description = "the host out-of-band management information")
private OutOfBandManagementResponse outOfBandManagementResponse;
@ -413,6 +418,14 @@ public class HostResponse extends BaseResponse {
this.suitableForMigration = suitableForMigration;
}
public HostHAResponse getHostHAResponse() {
return hostHAResponse;
}
public void setHostHAResponse(final HAConfig config) {
this.hostHAResponse = new HostHAResponse(config);
}
public OutOfBandManagementResponse getOutOfBandManagementResponse() {
return outOfBandManagementResponse;
}

View File

@ -0,0 +1,142 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package org.apache.cloudstack.ha;
import com.cloud.utils.fsm.StateMachine2;
import com.cloud.utils.fsm.StateObject;
import org.apache.cloudstack.api.InternalIdentity;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
public interface HAConfig extends StateObject<HAConfig.HAState>, InternalIdentity {
long getResourceId();
HAResource.ResourceType getResourceType();
boolean isEnabled();
HAState getState();
String getHaProvider();
Long getManagementServerId();
enum Event {
Eligible,
Ineligible,
Disabled,
Enabled,
HealthCheckPassed,
HealthCheckFailed,
PerformActivityCheck,
TooFewActivityCheckSamples,
PeriodicRecheckResourceActivity,
ActivityCheckFailureOverThresholdRatio,
ActivityCheckFailureUnderThresholdRatio,
PowerCycle,
Recovered,
RecoveryWaitPeriodTimeout,
RecoveryOperationThresholdExceeded,
Fenced;
public Long getServerId() {
// TODO: change in future if we've better claim & ownership
// Right now the first one to update the db wins
// and mgmt server id would eventually become consistent
return ManagementServerNode.getManagementServerId();
}
}
enum HAState {
Disabled("HA Operations disabled"),
Available("The resource is healthy"),
Ineligible("The current state does not support HA/recovery"),
Suspect("Most recent health check failed"),
Degraded("The resource cannot be managed, but services end user requests"),
Checking("The activity checks are currently being performed"),
Recovering("The resource is undergoing recovery operation"),
Recovered("The resource is recovered"),
Fencing("The resource is undergoing fence operation"),
Fenced("The resource is fenced");
String description;
HAState(String description) {
this.description = description;
}
public static StateMachine2<HAState, Event, HAConfig> getStateMachine() {
return FSM;
}
public String getDescription() {
return description;
}
private static final StateMachine2<HAState, Event, HAConfig> FSM = new StateMachine2<>();
static {
FSM.addInitialTransition(Event.Disabled, Disabled);
FSM.addInitialTransition(Event.Enabled, Available);
FSM.addInitialTransition(Event.Ineligible, Ineligible);
FSM.addTransition(Disabled, Event.Enabled, Available);
FSM.addTransition(Ineligible, Event.Disabled, Disabled);
FSM.addTransition(Ineligible, Event.Ineligible, Ineligible);
FSM.addTransition(Ineligible, Event.Eligible, Available);
FSM.addTransition(Available, Event.Disabled, Disabled);
FSM.addTransition(Available, Event.Ineligible, Ineligible);
FSM.addTransition(Available, Event.HealthCheckPassed, Available);
FSM.addTransition(Available, Event.HealthCheckFailed, Suspect);
FSM.addTransition(Suspect, Event.Disabled, Disabled);
FSM.addTransition(Suspect, Event.Ineligible, Ineligible);
FSM.addTransition(Suspect, Event.HealthCheckFailed, Suspect);
FSM.addTransition(Suspect, Event.PerformActivityCheck, Checking);
FSM.addTransition(Suspect, Event.HealthCheckPassed, Available);
FSM.addTransition(Checking, Event.Disabled, Disabled);
FSM.addTransition(Checking, Event.Ineligible, Ineligible);
FSM.addTransition(Checking, Event.TooFewActivityCheckSamples, Suspect);
FSM.addTransition(Checking, Event.ActivityCheckFailureUnderThresholdRatio, Degraded);
FSM.addTransition(Checking, Event.ActivityCheckFailureOverThresholdRatio, Recovering);
FSM.addTransition(Degraded, Event.Disabled, Disabled);
FSM.addTransition(Degraded, Event.Ineligible, Ineligible);
FSM.addTransition(Degraded, Event.HealthCheckFailed, Degraded);
FSM.addTransition(Degraded, Event.HealthCheckPassed, Available);
FSM.addTransition(Degraded, Event.PeriodicRecheckResourceActivity, Suspect);
FSM.addTransition(Recovering, Event.Disabled, Disabled);
FSM.addTransition(Recovering, Event.Ineligible, Ineligible);
FSM.addTransition(Recovering, Event.Recovered, Recovered);
FSM.addTransition(Recovering, Event.RecoveryOperationThresholdExceeded, Fencing);
FSM.addTransition(Recovered, Event.Disabled, Disabled);
FSM.addTransition(Recovered, Event.Ineligible, Ineligible);
FSM.addTransition(Recovered, Event.RecoveryWaitPeriodTimeout, Available);
FSM.addTransition(Fencing, Event.Disabled, Disabled);
FSM.addTransition(Fencing, Event.Ineligible, Ineligible);
FSM.addTransition(Fencing, Event.Fenced, Fenced);
FSM.addTransition(Fenced, Event.Disabled, Disabled);
FSM.addTransition(Fenced, Event.HealthCheckPassed, Ineligible);
FSM.addTransition(Fenced, Event.HealthCheckFailed, Fenced);
}
}
}

View File

@ -0,0 +1,96 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import com.cloud.dc.DataCenter;
import com.cloud.org.Cluster;
import java.util.List;
/**
* @since 4.11
*/
public interface HAConfigManager {
/**
* Configures HA for a resource by accepting the resource type and HA provider
* @param resourceId the ID of the resource
* @param resourceType the type of the resource
* @param haProvider the name of the HA provider
*/
boolean configureHA(Long resourceId, HAResource.ResourceType resourceType, String haProvider);
/**
* Enables HA for resource Id of a specific resource type
* @param resourceId the ID of the resource
* @param resourceType the type of the resource
* @return returns true on successful enable
*/
boolean enableHA(Long resourceId, HAResource.ResourceType resourceType);
/**
* Disables HA for resource Id of a specific resource type
* @param resourceId the ID of the resource
* @param resourceType the type of the resource
* @return returns true on successful disable
*/
boolean disableHA(Long resourceId, HAResource.ResourceType resourceType);
/**
* Enables HA across a cluster
* @param cluster the cluster
* @return returns operation success
*/
boolean enableHA(final Cluster cluster);
/**
* Disables HA across a cluster
* @param cluster the cluster
* @return returns operation success
*/
boolean disableHA(final Cluster cluster);
/**
* Enables HA across a zone
* @param zone the zone
* @return returns operation success
*/
boolean enableHA(final DataCenter zone);
/**
* Disables HA across a zone
* @param zone the zone
* @return returns operation success
*/
boolean disableHA(final DataCenter zone);
/**
* Returns list of HA config for resources, by resource ID and/or type if provided
* @param resourceId (optional) ID of the resource
* @param resourceType (optional) type of the resource
* @return returns list of ha config for the resource
*/
List<HAConfig> listHAResources(final Long resourceId, final HAResource.ResourceType resourceType);
/**
* Returns list of HA providers for resources
* @param resourceType type of the resource
* @param entityType sub-type of the resource
* @return returns list of ha provider names
*/
List<String> listHAProviders(final HAResource.ResourceType resourceType, final HAResource.ResourceSubType entityType);
}

View File

@ -0,0 +1,37 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import org.apache.cloudstack.kernel.Partition;
public interface HAResource extends Partition {
enum ResourceType {
Host,
VirtualMachine
}
enum ResourceSubType {
KVM,
Simulator,
Unknown
}
long getDataCenterId();
Long getClusterId();
ResourceType resourceType();
}

View File

@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.kernel;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.api.InternalIdentity;
public interface Partition extends InternalIdentity, Identity {
enum PartitionType {
Zone, Pod, Cluster, Host, VM
}
PartitionType partitionType();
}

View File

@ -44,4 +44,9 @@
<property name="typeClass" value="com.cloud.hypervisor.HypervisorGuru" />
</bean>
<bean class="org.apache.cloudstack.spring.lifecycle.registry.RegistryLifecycle">
<property name="registry" ref="haProvidersRegistry" />
<property name="typeClass" value="org.apache.cloudstack.ha.provider.HAProvider" />
</bean>
</beans>

View File

@ -307,6 +307,11 @@
<property name="excludeKey" value="data.motion.strategies.exclude" />
</bean>
<bean id="haProvidersRegistry"
class="org.apache.cloudstack.spring.lifecycle.registry.ExtensionRegistry">
<property name="orderConfigDefault" value="KVMHAProvider" />
</bean>
<bean id="outOfBandManagementDriversRegistry"
class="org.apache.cloudstack.spring.lifecycle.registry.ExtensionRegistry">
<property name="orderConfigDefault" value="IPMITOOL" />

View File

@ -0,0 +1,70 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package com.cloud.agent.api;
import com.cloud.agent.api.to.HostTO;
import com.cloud.agent.api.to.StorageFilerTO;
import com.cloud.host.Host;
import com.cloud.storage.StoragePool;
import com.cloud.storage.Volume;
import org.joda.time.DateTime;
import java.util.List;
public final class CheckVMActivityOnStoragePoolCommand extends Command {
private HostTO host;
private StorageFilerTO pool;
private String volumeList;
private long suspectTimeSeconds;
public CheckVMActivityOnStoragePoolCommand(final Host host, final StoragePool pool, final List<Volume> volumeList, final DateTime suspectTime) {
this.host = new HostTO(host);
this.pool = new StorageFilerTO(pool);
this.suspectTimeSeconds = suspectTime.getMillis()/1000L;
final StringBuilder stringBuilder = new StringBuilder();
for (final Volume v : volumeList) {
stringBuilder.append(v.getUuid()).append(",");
}
this.volumeList = stringBuilder.deleteCharAt(stringBuilder.length() - 1).toString();
}
public String getVolumeList() {
return volumeList;
}
public StorageFilerTO getPool() {
return pool;
}
public HostTO getHost() {
return host;
}
public long getSuspectTimeInSeconds() {
return suspectTimeSeconds;
}
@Override
public boolean executeInSequence() {
return false;
}
}

View File

@ -27,6 +27,8 @@ import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.kernel.Partition;
import org.junit.Test;
import com.cloud.agent.api.CheckOnHostCommand;
@ -38,6 +40,11 @@ import com.cloud.resource.ResourceState;
public class CheckOnHostCommandTest {
public Host host = new Host() {
@Override
public PartitionType partitionType() {
return PartitionType.Host;
}
@Override
public Status getState() {
return Status.Up;
@ -197,7 +204,12 @@ public class CheckOnHostCommandTest {
@Override
public Long getClusterId() {
return 3L;
};
}
@Override
public ResourceType resourceType() {
return ResourceType.Host;
}
@Override
public String getPublicIpAddress() {
@ -254,6 +266,11 @@ public class CheckOnHostCommandTest {
return false;
};
@Override
public boolean isDisabled() {
return false;
};
@Override
public ResourceState getResourceState() {
return ResourceState.Enabled;

View File

@ -137,6 +137,8 @@ public interface AgentManager {
void disconnectWithoutInvestigation(long hostId, Status.Event event);
void disconnectWithInvestigation(long hostId, Status.Event event);
public void pullAgentToMaintenance(long hostId);
public void pullAgentOutMaintenance(long hostId);

View File

@ -49,6 +49,7 @@ import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.managed.context.ManagedContextTimerTask;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.cloudstack.utils.security.SSLUtils;
import org.apache.cloudstack.ha.dao.HAConfigDao;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
import org.apache.log4j.Logger;
@ -123,6 +124,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
ConfigDepot _configDepot;
@Inject
private OutOfBandManagementDao outOfBandManagementDao;
@Inject
private HAConfigDao haConfigDao;
protected ClusteredAgentManagerImpl() {
super();
@ -744,6 +747,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
final long lastPing = (System.currentTimeMillis() >> 10) - getTimeout();
_hostDao.markHostsAsDisconnected(vo.getMsid(), lastPing);
outOfBandManagementDao.expireServerOwnership(vo.getMsid());
haConfigDao.expireServerOwnership(vo.getMsid());
s_logger.info("Deleting entries from op_host_transfer table for Management server " + vo.getMsid());
cleanupTransferMap(vo.getMsid());
}

View File

@ -16,8 +16,16 @@
// under the License.
package org.apache.cloudstack.engine.datacenter.entity.api.db;
import java.util.Date;
import java.util.UUID;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.org.Cluster;
import com.cloud.org.Grouping;
import com.cloud.org.Managed.ManagedState;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event;
import javax.persistence.Column;
import javax.persistence.Entity;
@ -29,18 +37,8 @@ import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.org.Cluster;
import com.cloud.org.Grouping;
import com.cloud.org.Managed.ManagedState;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import java.util.Date;
import java.util.UUID;
@Entity
@Table(name = "cluster")
@ -243,4 +241,9 @@ public class EngineClusterVO implements EngineCluster, Identity {
public State getState() {
return state;
}
@Override
public PartitionType partitionType() {
return PartitionType.Cluster;
}
}

View File

@ -16,9 +16,14 @@
// under the License.
package org.apache.cloudstack.engine.datacenter.entity.api.db;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
import com.cloud.network.Network.Provider;
import com.cloud.org.Grouping;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event;
import javax.persistence.Column;
import javax.persistence.Entity;
@ -32,16 +37,9 @@ import javax.persistence.TableGenerator;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import javax.persistence.Transient;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event;
import com.cloud.network.Network.Provider;
import com.cloud.org.Grouping;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
@Entity
@Table(name = "data_center")
@ -501,4 +499,9 @@ public class EngineDataCenterVO implements EngineDataCenter, Identity {
public void setIp6Dns2(String ip6Dns2) {
this.ip6Dns2 = ip6Dns2;
}
@Override
public PartitionType partitionType() {
return PartitionType.Zone;
}
}

View File

@ -16,10 +16,16 @@
// under the License.
package org.apache.cloudstack.engine.datacenter.entity.api.db;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import com.cloud.host.Status;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceState;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event;
import javax.persistence.Column;
import javax.persistence.DiscriminatorColumn;
@ -36,18 +42,10 @@ import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import javax.persistence.Transient;
import org.apache.cloudstack.api.Identity;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State;
import org.apache.cloudstack.engine.datacenter.entity.api.DataCenterResourceEntity.State.Event;
import com.cloud.host.Status;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceState;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.UUID;
@Entity
@Table(name = "host")
@ -178,6 +176,11 @@ public class EngineHostVO implements EngineHost, Identity {
return clusterId;
}
@Override
public ResourceType resourceType() {
return ResourceType.Host;
}
public void setClusterId(Long clusterId) {
this.clusterId = clusterId;
}
@ -720,6 +723,11 @@ public class EngineHostVO implements EngineHost, Identity {
resourceState = state;
}
@Override
public boolean isDisabled() {
return (getResourceState() == ResourceState.Disabled);
}
@Override
public boolean isInMaintenanceStates() {
return (getResourceState() == ResourceState.Maintenance || getResourceState() == ResourceState.ErrorInMaintenance || getResourceState() == ResourceState.PrepareForMaintenance);
@ -758,4 +766,9 @@ public class EngineHostVO implements EngineHost, Identity {
public State getOrchestrationState() {
return orchestrationState;
}
@Override
public PartitionType partitionType() {
return PartitionType.Host;
}
}

View File

@ -169,6 +169,8 @@
<bean id="guestOSCategoryDaoImpl" class="com.cloud.storage.dao.GuestOSCategoryDaoImpl" />
<bean id="guestOSDaoImpl" class="com.cloud.storage.dao.GuestOSDaoImpl" />
<bean id="guestOSHypervisorDaoImpl" class="com.cloud.storage.dao.GuestOSHypervisorDaoImpl" />
<!-- New HA Config -->
<bean id="haConfigDaoImpl" class="org.apache.cloudstack.ha.dao.HAConfigDaoImpl" />
<bean id="highAvailabilityDaoImpl" class="com.cloud.ha.dao.HighAvailabilityDaoImpl" />
<bean id="hostDaoImpl" class="com.cloud.host.dao.HostDaoImpl" />
<bean id="engineHostDetailsDaoImpl" class="org.apache.cloudstack.engine.datacenter.entity.api.db.dao.HostDetailsDaoImpl" />

View File

@ -16,8 +16,12 @@
// under the License.
package com.cloud.dc;
import java.util.Date;
import java.util.UUID;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.org.Cluster;
import com.cloud.org.Grouping;
import com.cloud.org.Managed.ManagedState;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import javax.persistence.Column;
import javax.persistence.Entity;
@ -27,13 +31,8 @@ import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.org.Cluster;
import com.cloud.org.Grouping;
import com.cloud.org.Managed.ManagedState;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import java.util.Date;
import java.util.UUID;
@Entity
@Table(name = "cluster")
@ -192,4 +191,9 @@ public class ClusterVO implements Cluster {
public void setUuid(String uuid) {
this.uuid = uuid;
}
@Override
public PartitionType partitionType() {
return PartitionType.Cluster;
}
}

View File

@ -16,9 +16,10 @@
// under the License.
package com.cloud.dc;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
import com.cloud.network.Network.Provider;
import com.cloud.org.Grouping;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import javax.persistence.Column;
import javax.persistence.Entity;
@ -30,11 +31,9 @@ import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.TableGenerator;
import javax.persistence.Transient;
import com.cloud.network.Network.Provider;
import com.cloud.org.Grouping;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
@Entity
@Table(name = "data_center")
@ -454,4 +453,9 @@ public class DataCenterVO implements DataCenter {
public void setIp6Dns2(String ip6Dns2) {
this.ip6Dns2 = ip6Dns2;
}
@Override
public PartitionType partitionType() {
return PartitionType.Zone;
}
}

View File

@ -16,11 +16,12 @@
// under the License.
package com.cloud.host;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import com.cloud.agent.api.VgpuTypesInfo;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceState;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import javax.persistence.Column;
import javax.persistence.DiscriminatorColumn;
@ -37,13 +38,11 @@ import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import javax.persistence.Transient;
import com.cloud.agent.api.VgpuTypesInfo;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.resource.ResourceState;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.db.GenericDao;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
@Entity
@Table(name = "host")
@ -178,6 +177,11 @@ public class HostVO implements Host {
return clusterId;
}
@Override
public ResourceType resourceType() {
return ResourceType.Host;
}
public void setClusterId(Long clusterId) {
this.clusterId = clusterId;
}
@ -713,6 +717,11 @@ public class HostVO implements Host {
return (getResourceState() == ResourceState.Maintenance || getResourceState() == ResourceState.ErrorInMaintenance || getResourceState() == ResourceState.PrepareForMaintenance);
}
@Override
public boolean isDisabled() {
return (getResourceState() == ResourceState.Disabled);
}
public long getUpdated() {
return updated;
}
@ -730,4 +739,9 @@ public class HostVO implements Host {
public void setUuid(String uuid) {
this.uuid = uuid;
}
@Override
public PartitionType partitionType() {
return PartitionType.Host;
}
}

View File

@ -16,11 +16,14 @@
// under the License.
package com.cloud.vm;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.utils.db.Encrypt;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import com.cloud.utils.fsm.FiniteStateObject;
import com.cloud.vm.VirtualMachine.State;
import org.apache.commons.codec.binary.Base64;
import org.apache.log4j.Logger;
import javax.persistence.Column;
import javax.persistence.DiscriminatorColumn;
@ -36,16 +39,11 @@ import javax.persistence.TableGenerator;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import javax.persistence.Transient;
import org.apache.commons.codec.binary.Base64;
import org.apache.log4j.Logger;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.utils.db.Encrypt;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.db.StateMachine;
import com.cloud.utils.fsm.FiniteStateObject;
import com.cloud.vm.VirtualMachine.State;
import java.security.NoSuchAlgorithmException;
import java.security.SecureRandom;
import java.util.Date;
import java.util.Map;
import java.util.UUID;
@Entity
@Table(name = "vm_instance")
@ -566,4 +564,9 @@ public class VMInstanceVO implements VirtualMachine, FiniteStateObject<State, Vi
public void setPowerHostId(Long hostId) {
powerHostId = hostId;
}
@Override
public PartitionType partitionType() {
return PartitionType.VM;
}
}

View File

@ -550,4 +550,9 @@ public class VMEntityVO implements VirtualMachine, FiniteStateObject<State, Virt
public void setDisplay(boolean display) {
this.display = display;
}
@Override
public PartitionType partitionType() {
return PartitionType.VM;
}
}

View File

@ -0,0 +1,142 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import com.cloud.utils.db.StateMachine;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import java.util.Date;
@Entity
@Table(name = "ha_config")
public class HAConfigVO implements HAConfig {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id")
private Long id;
@Column(name = "resource_id", updatable = false, nullable = false)
private long resourceId;
@Column(name = "resource_type", nullable = false)
@Enumerated(value = EnumType.STRING)
private HAResource.ResourceType resourceType;
@Column(name = "enabled")
private boolean enabled = false;
// There is no setter for status because it has to be set in the dao code
@Enumerated(value = EnumType.STRING)
@StateMachine(state = HAState.class, event = HAConfig.Event.class)
@Column(name = "ha_state", updatable = true, nullable = false, length = 32)
private HAState haState = null;
@Column(name = "provider")
private String haProvider;
// This field should be updated every time the state is updated.
// There's no set method in the vo object because it is done with in the dao code.
@Column(name = "update_count", updatable = true, nullable = false)
private long updateCount;
@Column(name = "update_time", updatable = true)
@Temporal(value = TemporalType.TIMESTAMP)
private Date updateTime;
@Column(name = "mgmt_server_id")
private Long managementServerId;
public HAConfigVO() {
}
@Override
public long getId() {
return id;
}
public long getResourceId() {
return resourceId;
}
public HAResource.ResourceType getResourceType() {
return resourceType;
}
public boolean isEnabled() {
return enabled;
}
public long getUpdateCount() {
return updateCount;
}
public long incrUpdateCount() {
updateCount++;
return updateCount;
}
public Date getUpdateTime() {
return updateTime;
}
public Long getManagementServerId() {
return managementServerId;
}
public HAState getHaState() {
return haState;
}
@Override
public HAState getState() {
return haState;
}
public String getHaProvider() {
return haProvider;
}
public void setHaProvider(String haProvider) {
this.haProvider = haProvider;
}
public void setResourceId(long resourceId) {
this.resourceId = resourceId;
}
public void setResourceType(HAResource.ResourceType resourceType) {
this.resourceType = resourceType;
}
public void setEnabled(boolean enabled) {
this.enabled = enabled;
}
public void setManagementServerId(Long managementServerId) {
this.managementServerId = managementServerId;
}
}

View File

@ -0,0 +1,32 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.dao;
import com.cloud.utils.db.GenericDao;
import com.cloud.utils.fsm.StateDao;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAConfigVO;
import org.apache.cloudstack.ha.HAResource;
import java.util.List;
public interface HAConfigDao extends GenericDao<HAConfigVO, Long>, StateDao<HAConfig.HAState, HAConfig.Event, HAConfig> {
HAConfig findHAResource(long resourceId, HAResource.ResourceType resourceType);
List<HAConfig> listHAResource(final Long resourceId, final HAResource.ResourceType resourceType);
void expireServerOwnership(long serverId);
}

View File

@ -0,0 +1,149 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.dao;
import com.cloud.utils.DateUtil;
import com.cloud.utils.db.Attribute;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.db.TransactionCallbackNoReturn;
import com.cloud.utils.db.TransactionLegacy;
import com.cloud.utils.db.TransactionStatus;
import com.cloud.utils.db.UpdateBuilder;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAConfigVO;
import org.apache.cloudstack.ha.HAResource;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
@DB
@Component
public class HAConfigDaoImpl extends GenericDaoBase<HAConfigVO, Long> implements HAConfigDao {
private static final Logger LOG = Logger.getLogger(HAConfigDaoImpl.class);
private static final String EXPIRE_OWNERSHIP = "UPDATE ha_config set mgmt_server_id=NULL where mgmt_server_id=?";
private SearchBuilder<HAConfigVO> ResourceSearch;
private SearchBuilder<HAConfigVO> StateUpdateSearch;
private Attribute HAStateAttr;
private Attribute MsIdAttr;
private Attribute UpdateTimeAttr;
public HAConfigDaoImpl() {
super();
ResourceSearch = createSearchBuilder();
ResourceSearch.and("resourceId", ResourceSearch.entity().getResourceId(), SearchCriteria.Op.EQ);
ResourceSearch.and("resourceType", ResourceSearch.entity().getResourceType(), SearchCriteria.Op.EQ);
ResourceSearch.done();
StateUpdateSearch = createSearchBuilder();
StateUpdateSearch.and("id", StateUpdateSearch.entity().getId(), SearchCriteria.Op.EQ);
StateUpdateSearch.and("haState", StateUpdateSearch.entity().getHaState(), SearchCriteria.Op.EQ);
StateUpdateSearch.and("update", StateUpdateSearch.entity().getUpdateCount(), SearchCriteria.Op.EQ);
StateUpdateSearch.done();
HAStateAttr = _allAttributes.get("haState");
MsIdAttr = _allAttributes.get("managementServerId");
UpdateTimeAttr = _allAttributes.get("updateTime");
assert (HAStateAttr != null && MsIdAttr != null && UpdateTimeAttr != null) : "Couldn't find one of these attributes";
}
@Override
public boolean updateState(HAConfig.HAState currentState, HAConfig.Event event, HAConfig.HAState nextState, HAConfig vo, Object data) {
HAConfigVO haConfig = (HAConfigVO) vo;
if (haConfig == null) {
if (LOG.isTraceEnabled()) {
LOG.trace("Invalid ha config view object provided");
}
return false;
}
Long newManagementServerId = event.getServerId();
if (currentState == nextState && (haConfig.getManagementServerId() != null && haConfig.getManagementServerId().equals(newManagementServerId))) {
return false;
}
if (event == HAConfig.Event.Disabled) {
newManagementServerId = null;
}
SearchCriteria<HAConfigVO> sc = StateUpdateSearch.create();
sc.setParameters("id", haConfig.getId());
sc.setParameters("haState", currentState);
sc.setParameters("update", haConfig.getUpdateCount());
haConfig.incrUpdateCount();
UpdateBuilder ub = getUpdateBuilder(haConfig);
ub.set(haConfig, HAStateAttr, nextState);
ub.set(haConfig, UpdateTimeAttr, DateUtil.currentGMTTime());
ub.set(haConfig, MsIdAttr, newManagementServerId);
int result = update(ub, sc, null);
if (LOG.isTraceEnabled() && result <= 0) {
LOG.trace(String.format("Failed to update HA state from:%s to:%s due to event:%s for the ha_config id:%d", currentState, nextState, event, haConfig.getId()));
}
return result > 0;
}
@Override
public HAConfig findHAResource(final long resourceId, final HAResource.ResourceType resourceType) {
final SearchCriteria<HAConfigVO> sc = ResourceSearch.create();
sc.setParameters("resourceId", resourceId);
sc.setParameters("resourceType", resourceType);
return findOneBy(sc);
}
@Override
public List<HAConfig> listHAResource(final Long resourceId, final HAResource.ResourceType resourceType) {
final SearchCriteria<HAConfigVO> sc = ResourceSearch.create();
if (resourceId != null && resourceId > 0L) {
sc.setParameters("resourceId", resourceId);
}
if (resourceType != null) {
sc.setParameters("resourceType", resourceType);
}
return new ArrayList<HAConfig>(listBy(sc));
}
@Override
public void expireServerOwnership(final long serverId) {
Transaction.execute(new TransactionCallbackNoReturn() {
@Override
public void doInTransactionWithoutResult(TransactionStatus status) {
TransactionLegacy txn = TransactionLegacy.currentTxn();
try (final PreparedStatement pstmt = txn.prepareAutoCloseStatement(EXPIRE_OWNERSHIP);) {
pstmt.setLong(1, serverId);
pstmt.executeUpdate();
} catch (SQLException e) {
txn.rollback();
LOG.warn("Failed to expire HA ownership of management server id: " + serverId);
}
}
});
}
}

View File

@ -30,5 +30,12 @@
<bean id="KVMInvestigator" class="com.cloud.ha.KVMInvestigator">
<property name="name" value="KVMInvestigator" />
</bean>
<bean id="KVMHostActivityChecker" class="org.apache.cloudstack.kvm.ha.KVMHostActivityChecker">
</bean>
<bean id="KVMHAProvider" class="org.apache.cloudstack.kvm.ha.KVMHAProvider">
<property name="name" value="KVMHAProvider" />
</bean>
</beans>

View File

@ -29,7 +29,7 @@ import com.cloud.hypervisor.Hypervisor;
import com.cloud.resource.ResourceManager;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.component.AdapterBase;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.log4j.Logger;
@ -40,17 +40,23 @@ import java.util.List;
public class KVMInvestigator extends AdapterBase implements Investigator {
private final static Logger s_logger = Logger.getLogger(KVMInvestigator.class);
@Inject
HostDao _hostDao;
private HostDao _hostDao;
@Inject
AgentManager _agentMgr;
private AgentManager _agentMgr;
@Inject
ResourceManager _resourceMgr;
private ResourceManager _resourceMgr;
@Inject
PrimaryDataStoreDao _storagePoolDao;
private PrimaryDataStoreDao _storagePoolDao;
@Inject
private HAManager haManager;
@Override
public boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) throws UnknownVM {
if (haManager.isHAEligible(host)) {
return haManager.isVMAliveOnHost(host);
}
Status status = isAgentAlive(host);
s_logger.debug("HA: HOST is ineligible legacy state " + status + " for host " + host.getId());
if (status == null) {
throw new UnknownVM();
}
@ -67,6 +73,10 @@ public class KVMInvestigator extends AdapterBase implements Investigator {
return null;
}
if (haManager.isHAEligible(agent)) {
return haManager.getHostStatus(agent);
}
List<StoragePoolVO> clusterPools = _storagePoolDao.listPoolsByCluster(agent.getClusterId());
boolean hasNfs = false;
for (StoragePoolVO pool : clusterPools) {
@ -123,6 +133,7 @@ public class KVMInvestigator extends AdapterBase implements Investigator {
if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) {
hostStatus = Status.Down;
}
s_logger.debug("HA: HOST is ineligible legacy state " + hostStatus + " for host " + agent.getId());
return hostStatus;
}
}

View File

@ -179,4 +179,9 @@ public class KVMHABase {
return result;
}
public Boolean checkingHB() {
// TODO Auto-generated method stub
return null;
}
}

View File

@ -40,10 +40,10 @@ public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
* True means heartbeaing is on going, or we can't get it's status. False
* means heartbeating is stopped definitely
*/
private Boolean checkingHB() {
@Override
public Boolean checkingHB() {
List<Boolean> results = new ArrayList<Boolean>();
for (NfsStoragePool pool : _pools) {
Script cmd = new Script(s_heartBeatPath, _heartBeatCheckerTimeout, s_logger);
cmd.add("-i", pool._poolIp);
cmd.add("-p", pool._poolMountSourcePath);
@ -53,9 +53,9 @@ public class KVMHAChecker extends KVMHABase implements Callable<Boolean> {
cmd.add("-t", String.valueOf(_heartBeatUpdateFreq / 1000));
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
String result = cmd.execute(parser);
s_logger.debug("pool: " + pool._poolIp);
s_logger.debug("reture: " + result);
s_logger.debug("parser: " + parser.getLine());
s_logger.debug("KVMHAChecker pool: " + pool._poolIp);
s_logger.debug("KVMHAChecker reture: " + result);
s_logger.debug("KVMHAChecker parser: " + parser.getLine());
if (result == null && parser.getLine().contains("> DEAD <")) {
s_logger.debug("read heartbeat failed: ");
results.add(false);

View File

@ -16,22 +16,20 @@
// under the License.
package com.cloud.hypervisor.kvm.resource;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import com.cloud.utils.script.Script;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.log4j.Logger;
import org.libvirt.Connect;
import org.libvirt.LibvirtException;
import org.libvirt.StoragePool;
import org.libvirt.StoragePoolInfo.StoragePoolState;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import com.cloud.utils.script.Script;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
public class KVMHAMonitor extends KVMHABase implements Runnable {
private static final Logger s_logger = Logger.getLogger(KVMHAMonitor.class);
@ -73,6 +71,12 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
}
}
public NfsStoragePool getStoragePool(String uuid) {
synchronized (_storagePool) {
return _storagePool.get(uuid);
}
}
private class Monitor extends ManagedContextRunnable {
@Override

View File

@ -0,0 +1,70 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.hypervisor.kvm.resource;
import com.cloud.utils.script.OutputInterpreter;
import com.cloud.utils.script.Script;
import org.apache.log4j.Logger;
import org.joda.time.Duration;
import java.util.concurrent.Callable;
public class KVMHAVMActivityChecker extends KVMHABase implements Callable<Boolean> {
private static final Logger LOG = Logger.getLogger(KVMHAVMActivityChecker.class);
final private NfsStoragePool nfsStoragePool;
final private String hostIP;
final private String volumeUuidList;
final private String vmActivityCheckPath;
final private Duration activityScriptTimeout = Duration.standardSeconds(3600L);
final private long suspectTimeInSeconds;
public KVMHAVMActivityChecker(final NfsStoragePool pool, final String host, final String volumeUUIDListString, String vmActivityCheckPath, final long suspectTime) {
this.nfsStoragePool = pool;
this.hostIP = host;
this.volumeUuidList = volumeUUIDListString;
this.vmActivityCheckPath = vmActivityCheckPath;
this.suspectTimeInSeconds = suspectTime;
}
@Override
public Boolean checkingHB() {
Script cmd = new Script(vmActivityCheckPath, activityScriptTimeout.getStandardSeconds(), LOG);
cmd.add("-i", nfsStoragePool._poolIp);
cmd.add("-p", nfsStoragePool._poolMountSourcePath);
cmd.add("-m", nfsStoragePool._mountDestPath);
cmd.add("-h", hostIP);
cmd.add("-u", volumeUuidList);
cmd.add("-t", String.valueOf(String.valueOf(System.currentTimeMillis() / 1000)));
cmd.add("-d", String.valueOf(suspectTimeInSeconds));
OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser();
String result = cmd.execute(parser);
LOG.debug("KVMHAVMActivityChecker pool: " + nfsStoragePool._poolIp);
LOG.debug("KVMHAVMActivityChecker result: " + result);
LOG.debug("KVMHAVMActivityChecker parser: " + parser.getLine());
if (result == null && parser.getLine().contains("DEAD")) {
return false;
} else {
return true;
}
}
@Override
public Boolean call() throws Exception {
return checkingHB();
}
}

View File

@ -201,6 +201,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
private String _resizeVolumePath;
private String _createTmplPath;
private String _heartBeatPath;
private String _vmActivityCheckPath;
private String _securityGroupPath;
private String _ovsPvlanDhcpHostPath;
private String _ovsPvlanVmPath;
@ -447,6 +448,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
return _guestBridgeName;
}
public String getVmActivityCheckPath() {
return _vmActivityCheckPath;
}
public String getOvsPvlanDhcpHostPath() {
return _ovsPvlanDhcpHostPath;
}
@ -687,6 +692,11 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
throw new ConfigurationException("Unable to find the resizevolume.sh");
}
_vmActivityCheckPath = Script.findScript(kvmScriptsDir, "kvmvmactivity.sh");
if (_vmActivityCheckPath == null) {
throw new ConfigurationException("Unable to find kvmvmactivity.sh");
}
_createTmplPath = Script.findScript(storageScriptsDir, "createtmplt.sh");
if (_createTmplPath == null) {
throw new ConfigurationException("Unable to find the createtmplt.sh");

View File

@ -53,14 +53,14 @@ public final class LibvirtCheckOnHostCommandWrapper extends CommandWrapper<Check
try {
final Boolean result = future.get();
if (result) {
return new Answer(command, false, "Heart is still beating...");
return new Answer(command, false, "Heart is beating...");
} else {
return new Answer(command);
}
} catch (final InterruptedException e) {
return new Answer(command, false, "can't get status of host:");
return new Answer(command, false, "CheckOnHostCommand: can't get status of host: InterruptedException");
} catch (final ExecutionException e) {
return new Answer(command, false, "can't get status of host:");
return new Answer(command, false, "CheckOnHostCommand: can't get status of host: ExecutionException");
}
}
}

View File

@ -0,0 +1,65 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package com.cloud.hypervisor.kvm.resource.wrapper;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckVMActivityOnStoragePoolCommand;
import com.cloud.agent.api.to.StorageFilerTO;
import com.cloud.hypervisor.kvm.resource.KVMHABase.NfsStoragePool;
import com.cloud.hypervisor.kvm.resource.KVMHAMonitor;
import com.cloud.hypervisor.kvm.resource.KVMHAVMActivityChecker;
import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource;
import com.cloud.resource.CommandWrapper;
import com.cloud.resource.ResourceWrapper;
import com.cloud.storage.Storage;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
@ResourceWrapper(handles = CheckVMActivityOnStoragePoolCommand.class)
public final class LibvirtCheckVMActivityOnStoragePoolCommandWrapper extends CommandWrapper<CheckVMActivityOnStoragePoolCommand, Answer, LibvirtComputingResource> {
@Override
public Answer execute(final CheckVMActivityOnStoragePoolCommand command, final LibvirtComputingResource libvirtComputingResource) {
final ExecutorService executors = Executors.newSingleThreadExecutor();
final KVMHAMonitor monitor = libvirtComputingResource.getMonitor();
final StorageFilerTO pool = command.getPool();
if (Storage.StoragePoolType.NetworkFilesystem == pool.getType()){
final NfsStoragePool nfspool = monitor.getStoragePool(pool.getUuid());
final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost().getPrivateNetwork().getIp(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds());
final Future<Boolean> future = executors.submit(ha);
try {
final Boolean result = future.get();
if (result) {
return new Answer(command, false, "VMHA disk activity detected ...");
} else {
return new Answer(command);
}
} catch (InterruptedException e) {
return new Answer(command, false, "CheckVMActivityOnStoragePoolCommand: can't get status of host: InterruptedException");
} catch (ExecutionException e) {
return new Answer(command, false, "CheckVMActivityOnStoragePoolCommand: can't get status of host: ExecutionException");
}
}
return new Answer(command, false, "Unsupported Storage");
}
}

View File

@ -0,0 +1,56 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.kvm.ha;
import org.apache.cloudstack.framework.config.ConfigKey;
public class KVMHAConfig {
public static final ConfigKey<Long> KvmHAHealthCheckTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.health.check.timeout", "10",
"The maximum length of time, in seconds, expected for an health check to complete.", true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHAActivityCheckTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.timeout", "60",
"The maximum length of time, in seconds, expected for an activity check to complete.", true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHAActivityCheckInterval = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.interval", "60",
"The interval, in seconds, between activity checks.", true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHAActivityCheckMaxAttempts = new ConfigKey<>("Advanced", Long.class, "kvm.ha.activity.check.max.attempts", "10",
"The maximum number of activity check attempts to perform before deciding to recover or degrade a resource.", true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Double> KvmHAActivityCheckFailureThreshold = new ConfigKey<>("Advanced", Double.class, "kvm.ha.activity.check.failure.ratio", "0.7",
"The activity check failure threshold ratio. This is used with the activity check maximum attempts for deciding to recover or degrade a resource. For most environments, please keep this value above 0.5.",
true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHADegradedMaxPeriod = new ConfigKey<>("Advanced", Long.class, "kvm.ha.degraded.max.period", "300",
"The maximum length of time, in seconds, a resource can be in degraded state where only health checks are performed.", true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHARecoverTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.timeout", "60",
"The maximum length of time, in seconds, expected for a recovery operation to complete.", true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHARecoverWaitPeriod = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.wait.period", "600",
"The maximum length of time, in seconds, to wait for a resource to recover.", true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHARecoverAttemptThreshold = new ConfigKey<>("Advanced", Long.class, "kvm.ha.recover.failure.threshold", "1",
"The maximum recovery attempts to be made for a resource, after which the resource is fenced. The recovery counter resets when a health check passes for a resource.",
true, ConfigKey.Scope.Cluster);
public static final ConfigKey<Long> KvmHAFenceTimeout = new ConfigKey<>("Advanced", Long.class, "kvm.ha.fence.timeout", "60",
"The maximum length of time, in seconds, expected for a fence operation to complete.", true, ConfigKey.Scope.Cluster);
}

View File

@ -0,0 +1,157 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cloudstack.kvm.ha;
import com.cloud.host.Host;
import com.cloud.hypervisor.Hypervisor;
import org.apache.cloudstack.api.response.OutOfBandManagementResponse;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HAFenceException;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.cloudstack.ha.provider.HARecoveryException;
import org.apache.cloudstack.ha.provider.host.HAAbstractHostProvider;
import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement.PowerOperation;
import org.apache.cloudstack.outofbandmanagement.OutOfBandManagementService;
import org.apache.log4j.Logger;
import org.joda.time.DateTime;
import javax.inject.Inject;
import java.security.InvalidParameterException;
public final class KVMHAProvider extends HAAbstractHostProvider implements HAProvider<Host>, Configurable {
private final static Logger LOG = Logger.getLogger(KVMHAProvider.class);
@Inject
protected KVMHostActivityChecker hostActivityChecker;
@Inject
protected OutOfBandManagementService outOfBandManagementService;
@Override
public boolean isEligible(final Host host) {
if (outOfBandManagementService.isOutOfBandManagementEnabled(host)){
return !isInMaintenanceMode(host) && !isDisabled(host) &&
hostActivityChecker.getNeighbors(host).length > 0 &&
(Hypervisor.HypervisorType.KVM.equals(host.getHypervisorType()) ||
Hypervisor.HypervisorType.LXC.equals(host.getHypervisorType()));
}
return false;
}
@Override
public boolean isHealthy(final Host r) throws HACheckerException {
return hostActivityChecker.isHealthy(r);
}
@Override
public boolean hasActivity(final Host r, final DateTime suspectTime) throws HACheckerException {
return hostActivityChecker.isActive(r, suspectTime);
}
@Override
public boolean recover(Host r) throws HARecoveryException {
try {
if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){
final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.RESET, null);
return resp.getSuccess();
} else {
LOG.warn("OOBM recover operation failed for the host " + r.getName());
return false;
}
} catch (Exception e){
LOG.warn("OOBM service is not configured or enabled for this host " + r.getName() + " error is " + e.getMessage());
throw new HARecoveryException(" OOBM service is not configured or enabled for this host " + r.getName(), e);
}
}
@Override
public boolean fence(Host r) throws HAFenceException {
try {
if (outOfBandManagementService.isOutOfBandManagementEnabled(r)){
final OutOfBandManagementResponse resp = outOfBandManagementService.executePowerOperation(r, PowerOperation.OFF, null);
return resp.getSuccess();
} else {
LOG.warn("OOBM fence operation failed for this host " + r.getName());
return false;
}
} catch (Exception e){
LOG.warn("OOBM service is not configured or enabled for this host " + r.getName() + " error is " + e.getMessage());
throw new HAFenceException("OOBM service is not configured or enabled for this host " + r.getName() , e);
}
}
@Override
public HAResource.ResourceSubType resourceSubType() {
return HAResource.ResourceSubType.KVM;
}
@Override
public Object getConfigValue(final HAProviderConfig name, final Host host) {
final Long clusterId = host.getClusterId();
switch (name) {
case HealthCheckTimeout:
return KVMHAConfig.KvmHAHealthCheckTimeout.valueIn(clusterId);
case ActivityCheckTimeout:
return KVMHAConfig.KvmHAActivityCheckTimeout.valueIn(clusterId);
case MaxActivityCheckInterval:
return KVMHAConfig.KvmHAActivityCheckInterval.valueIn(clusterId);
case MaxActivityChecks:
return KVMHAConfig.KvmHAActivityCheckMaxAttempts.valueIn(clusterId);
case ActivityCheckFailureRatio:
return KVMHAConfig.KvmHAActivityCheckFailureThreshold.valueIn(clusterId);
case RecoveryWaitTimeout:
return KVMHAConfig.KvmHARecoverWaitPeriod.valueIn(clusterId);
case RecoveryTimeout:
return KVMHAConfig.KvmHARecoverTimeout.valueIn(clusterId);
case FenceTimeout:
return KVMHAConfig.KvmHAFenceTimeout.valueIn(clusterId);
case MaxRecoveryAttempts:
return KVMHAConfig.KvmHARecoverAttemptThreshold.valueIn(clusterId);
case MaxDegradedWaitTimeout:
return KVMHAConfig.KvmHADegradedMaxPeriod.valueIn(clusterId);
default:
throw new InvalidParameterException("Unknown HAProviderConfig " + name.toString());
}
}
@Override
public String getConfigComponentName() {
return KVMHAConfig.class.getSimpleName();
}
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {
KVMHAConfig.KvmHAHealthCheckTimeout,
KVMHAConfig.KvmHAActivityCheckTimeout,
KVMHAConfig.KvmHARecoverTimeout,
KVMHAConfig.KvmHAFenceTimeout,
KVMHAConfig.KvmHAActivityCheckInterval,
KVMHAConfig.KvmHAActivityCheckMaxAttempts,
KVMHAConfig.KvmHAActivityCheckFailureThreshold,
KVMHAConfig.KvmHADegradedMaxPeriod,
KVMHAConfig.KvmHARecoverWaitPeriod,
KVMHAConfig.KvmHARecoverAttemptThreshold
};
}
}

View File

@ -0,0 +1,205 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.kvm.ha;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckOnHostCommand;
import com.cloud.agent.api.CheckVMActivityOnStoragePoolCommand;
import com.cloud.exception.StorageUnavailableException;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.resource.ResourceManager;
import com.cloud.storage.StorageManager;
import com.cloud.storage.StoragePool;
import com.cloud.storage.Volume;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.dao.VolumeDao;
import com.cloud.utils.component.AdapterBase;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.dao.VMInstanceDao;
import org.apache.cloudstack.ha.provider.ActivityCheckerInterface;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HealthCheckerInterface;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.commons.lang.ArrayUtils;
import org.apache.log4j.Logger;
import javax.inject.Inject;
import java.util.ArrayList;
import org.joda.time.DateTime;
import java.util.HashMap;
import java.util.List;
public class KVMHostActivityChecker extends AdapterBase implements ActivityCheckerInterface<Host>, HealthCheckerInterface<Host> {
private final static Logger LOG = Logger.getLogger(KVMHostActivityChecker.class);
@Inject
private VolumeDao volumeDao;
@Inject
private VMInstanceDao vmInstanceDao;
@Inject
private AgentManager agentMgr;
@Inject
private PrimaryDataStoreDao storagePool;
@Inject
private StorageManager storageManager;
@Inject
private ResourceManager resourceManager;
@Override
public boolean isActive(Host r, DateTime suspectTime) throws HACheckerException {
try {
return isVMActivtyOnHost(r, suspectTime);
}
catch (StorageUnavailableException e){
throw new HACheckerException("Storage is unavailable to do the check, mostly host is not reachable ", e);
}
catch (Exception e){
throw new HACheckerException("Operation timed out, mostly host is not reachable ", e);
}
}
@Override
public boolean isHealthy(Host r) {
return isAgentActive(r);
}
private boolean isAgentActive(Host agent) {
if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) {
throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType());
}
Status hostStatus = Status.Unknown;
Status neighbourStatus = Status.Unknown;
final CheckOnHostCommand cmd = new CheckOnHostCommand(agent);
try {
Answer answer = agentMgr.easySend(agent.getId(), cmd);
if (answer != null) {
hostStatus = answer.getResult() ? Status.Down : Status.Up;
if ( hostStatus == Status.Up ){
return true;
}
}
else {
hostStatus = Status.Disconnected;
}
} catch (Exception e) {
LOG.warn("Failed to send command to host: " + agent.getId());
}
List<HostVO> neighbors = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up);
for (HostVO neighbor : neighbors) {
if (neighbor.getId() == agent.getId() || (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) {
continue;
}
if (LOG.isTraceEnabled()){
LOG.trace("Investigating host:" + agent.getId() + " via neighbouring host:" + neighbor.getId());
}
try {
Answer answer = agentMgr.easySend(neighbor.getId(), cmd);
if (answer != null) {
neighbourStatus = answer.getResult() ? Status.Down : Status.Up;
if (LOG.isTraceEnabled()){
LOG.trace("Neighbouring host:" + neighbor.getId() + " returned status:" + neighbourStatus + " for the investigated host:" + agent.getId());
}
if (neighbourStatus == Status.Up) {
break;
}
}
} catch (Exception e) {
if (LOG.isTraceEnabled()) {
LOG.trace("Failed to send command to host: " + neighbor.getId());
}
}
}
if (neighbourStatus == Status.Up && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) {
hostStatus = Status.Disconnected;
}
if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) {
hostStatus = Status.Down;
}
if (LOG.isTraceEnabled()){
LOG.trace("Resource state = " + hostStatus.name());
}
return hostStatus == Status.Up;
}
private boolean isVMActivtyOnHost(Host agent, DateTime suspectTime) throws StorageUnavailableException {
if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) {
throw new IllegalStateException("Calling KVM investigator for non KVM Host of type " + agent.getHypervisorType());
}
boolean activityStatus = true;
HashMap<StoragePool, List<Volume>> poolVolMap = getVolumeUuidOnHost(agent);
for (StoragePool pool : poolVolMap.keySet()) {
//for each storage pool find activity
List<Volume> volume_list = poolVolMap.get(pool);
final CheckVMActivityOnStoragePoolCommand cmd = new CheckVMActivityOnStoragePoolCommand(agent, pool, volume_list, suspectTime);
//send the command to appropriate storage pool
Answer answer = storageManager.sendToPool(pool, getNeighbors(agent), cmd);
if (answer != null) {
activityStatus = ! answer.getResult();
} else {
throw new IllegalStateException("Did not get a valid response for VM activity check for host " + agent.getId());
}
}
if (LOG.isDebugEnabled()){
LOG.debug("Resource active = " + activityStatus);
}
return activityStatus;
}
private HashMap<StoragePool, List<Volume>> getVolumeUuidOnHost(Host agent) {
List<VMInstanceVO> vm_list = vmInstanceDao.listByHostId(agent.getId());
List<VolumeVO> volume_list = new ArrayList<VolumeVO>();
for (VirtualMachine vm : vm_list) {
List<VolumeVO> vm_volume_list = volumeDao.findByInstance(vm.getId());
volume_list.addAll(vm_volume_list);
}
HashMap<StoragePool, List<Volume>> poolVolMap = new HashMap<StoragePool, List<Volume>>();
for (Volume vol : volume_list) {
StoragePool sp = storagePool.findById(vol.getPoolId());
if (!poolVolMap.containsKey(sp)) {
List<Volume> list = new ArrayList<Volume>();
list.add(vol);
poolVolMap.put(sp, list);
} else {
poolVolMap.get(sp).add(vol);
}
}
return poolVolMap;
}
public long[] getNeighbors(Host agent) {
List<Long> neighbors = new ArrayList<Long>();
List<HostVO> cluster_hosts = resourceManager.listHostsInClusterByStatus(agent.getClusterId(), Status.Up);
for (HostVO host : cluster_hosts) {
if (host.getId() == agent.getId() || (host.getHypervisorType() != Hypervisor.HypervisorType.KVM && host.getHypervisorType() != Hypervisor.HypervisorType.LXC)) {
continue;
}
neighbors.add(host.getId());
}
return ArrayUtils.toPrimitive(neighbors.toArray(new Long[neighbors.size()]));
}
}

View File

@ -0,0 +1,84 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.cloudstack.kvm.ha;
import com.cloud.exception.StorageUnavailableException;
import com.cloud.host.Host;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mockito.Mock;
import org.mockito.MockitoAnnotations;
import org.mockito.runners.MockitoJUnitRunner;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.when;
import org.joda.time.DateTime;
@RunWith(MockitoJUnitRunner.class)
public class KVMHostHATest {
@Mock
private Host host;
@Mock
private KVMHostActivityChecker kvmHostActivityChecker;
private KVMHAProvider kvmHAProvider;
@Before
public void setup() {
MockitoAnnotations.initMocks(this);
kvmHAProvider = new KVMHAProvider();
kvmHAProvider.hostActivityChecker = kvmHostActivityChecker;
}
@Test
public void testHostActivityForHealthyHost() throws HACheckerException, StorageUnavailableException {
when(host.getHypervisorType()).thenReturn(HypervisorType.KVM);
when(kvmHostActivityChecker.isHealthy(host)).thenReturn(true);
assertTrue(kvmHAProvider.isHealthy(host));
}
@Test
public void testHostActivityForUnHealthyHost() throws HACheckerException, StorageUnavailableException {
when(host.getHypervisorType()).thenReturn(HypervisorType.KVM);
when(kvmHostActivityChecker.isHealthy(host)).thenReturn(false);
assertFalse(kvmHAProvider.isHealthy(host));
}
@Test
public void testHostActivityForActiveHost() throws HACheckerException, StorageUnavailableException {
when(host.getHypervisorType()).thenReturn(HypervisorType.KVM);
DateTime dt = new DateTime();
when(kvmHostActivityChecker.isActive(host, dt)).thenReturn(true);
assertTrue(kvmHAProvider.hasActivity(host, dt));
}
@Test
public void testHostActivityForDownHost() throws HACheckerException, StorageUnavailableException {
when(host.getHypervisorType()).thenReturn(HypervisorType.KVM);
DateTime dt = new DateTime();
when(kvmHostActivityChecker.isActive(host, dt)).thenReturn(false);
assertFalse(kvmHAProvider.hasActivity(host, dt));
}
}

View File

@ -63,5 +63,10 @@
<artifactId>cloud-engine-storage-snapshot</artifactId>
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>${cs.guava.version}</version>
</dependency>
</dependencies>
</project>

View File

@ -36,4 +36,8 @@
<bean id="SimulatorFencer" class="com.cloud.ha.SimulatorFencer">
<property name="name" value="Simulator Fencer"/>
</bean>
<bean id="SimulatorHAProvider" class="org.apache.cloudstack.ha.SimulatorHAProvider">
<property name="name" value="SimulatorHAProvider" />
</bean>
</beans>

View File

@ -112,6 +112,8 @@ import com.cloud.agent.api.storage.ListVolumeCommand;
import com.cloud.agent.api.storage.PrimaryStorageDownloadCommand;
import com.cloud.api.commands.CleanupSimulatorMockCmd;
import com.cloud.api.commands.ConfigureSimulatorCmd;
import com.cloud.api.commands.ConfigureSimulatorHAProviderState;
import com.cloud.api.commands.ListSimulatorHAStateTransitions;
import com.cloud.api.commands.QuerySimulatorMockCmd;
import com.cloud.resource.SimulatorStorageProcessor;
import com.cloud.serializer.GsonHelper;
@ -193,6 +195,8 @@ public class SimulatorManagerImpl extends ManagerBase implements SimulatorManage
cmdList.add(ConfigureSimulatorCmd.class);
cmdList.add(QuerySimulatorMockCmd.class);
cmdList.add(CleanupSimulatorMockCmd.class);
cmdList.add(ConfigureSimulatorHAProviderState.class);
cmdList.add(ListSimulatorHAStateTransitions.class);
return cmdList;
}

View File

@ -0,0 +1,120 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.api.commands;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.host.Host;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.api.response.SuccessResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.ha.SimulatorHAProvider;
import org.apache.cloudstack.ha.SimulatorHAState;
import javax.inject.Inject;
@APICommand(name = ConfigureSimulatorHAProviderState.APINAME,
description="configures simulator HA provider state for a host for probing and testing",
responseObject=SuccessResponse.class,
since = "4.11", authorized = {RoleType.Admin})
public final class ConfigureSimulatorHAProviderState extends BaseCmd {
public static final String APINAME = "configureSimulatorHAProviderState";
@Inject
private HAManager haManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.HOST_ID, type = BaseCmd.CommandType.UUID, entityType = HostResponse.class,
description = "List by host ID", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long hostId;
@Parameter(name = ApiConstants.HEALTH, type = CommandType.BOOLEAN,
description = "Set true is haprovider for simulator host should be healthy",
required = true)
private Boolean healthy;
@Parameter(name = ApiConstants.ACTIVITY, type = CommandType.BOOLEAN,
description = "Set true is haprovider for simulator host should have activity",
required = true)
private Boolean activity;
@Parameter(name = ApiConstants.RECOVER, type = CommandType.BOOLEAN,
description = "Set true is haprovider for simulator host should be be recoverable",
required = true)
private Boolean recovery;
@Parameter(name = ApiConstants.FENCE, type = CommandType.BOOLEAN,
description = "Set true is haprovider for simulator host should be be fence-able",
required = true)
private Boolean fenceable;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getHostId() {
return hostId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final Host host = _resourceService.getHost(getHostId());
if (host == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId());
}
final SimulatorHAState haState = new SimulatorHAState(healthy, activity, recovery, fenceable);
final SimulatorHAProvider simulatorHAProvider = (SimulatorHAProvider) haManager.getHAProvider(SimulatorHAProvider.class.getSimpleName().toLowerCase());
if (simulatorHAProvider != null) {
simulatorHAProvider.setHAStateForHost(host.getId(), haState);
}
final SuccessResponse response = new SuccessResponse();
response.setSuccess(simulatorHAProvider != null);
response.setResponseName(getCommandName());
response.setObjectName("simulatorhaprovider");
setResponseObject(response);
}
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
}

View File

@ -0,0 +1,104 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.api.commands;
import com.cloud.api.response.SimulatorHAStateResponse;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.host.Host;
import org.apache.cloudstack.acl.RoleType;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiArgValidator;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.BaseListCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.api.response.ListResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.ha.SimulatorHAProvider;
import javax.inject.Inject;
import java.util.ArrayList;
import java.util.List;
@APICommand(name = ListSimulatorHAStateTransitions.APINAME,
description="list recent simulator HA state transitions for a host for probing and testing",
responseObject=SimulatorHAStateResponse.class,
since = "4.11", authorized = {RoleType.Admin})
public final class ListSimulatorHAStateTransitions extends BaseListCmd {
public static final String APINAME = "listSimulatorHAStateTransitions";
@Inject
private HAManager haManager;
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.HOST_ID, type = BaseCmd.CommandType.UUID, entityType = HostResponse.class,
description = "List by host ID", required = true, validations = {ApiArgValidator.PositiveNumber})
private Long hostId;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getHostId() {
return hostId;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException {
final Host host = _resourceService.getHost(getHostId());
if (host == null) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Unable to find host by ID: " + getHostId());
}
final SimulatorHAProvider simulatorHAProvider = (SimulatorHAProvider) haManager.getHAProvider(SimulatorHAProvider.class.getSimpleName().toLowerCase());
List<SimulatorHAStateResponse> recentStates = new ArrayList<>();
if (simulatorHAProvider != null) {
recentStates = simulatorHAProvider.listHAStateTransitions(host.getId());
}
final ListResponse<SimulatorHAStateResponse> response = new ListResponse<>();
response.setResponses(recentStates);
response.setResponseName(getCommandName());
response.setObjectName("simulatorhastatetransition");
setResponseObject(response);
}
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
return CallContext.current().getCallingAccountId();
}
}

View File

@ -0,0 +1,65 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.api.response;
import com.cloud.serializer.Param;
import com.google.gson.annotations.SerializedName;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.ha.HAConfig;
public class SimulatorHAStateResponse extends BaseResponse {
@SerializedName(ApiConstants.HA_STATE) @Param(description="the ha state")
private String haState;
@SerializedName("prevhastate") @Param(description="the previous ha state")
private String previousHaState;
@SerializedName("event") @Param(description="the event that caused state transition")
private String haEvent;
@SerializedName("activitycounter") @Param(description="the activity counter")
private Long activityCounter;
@SerializedName("recoverycounter") @Param(description="the recovery counter")
private Long recoveryCounter;
public void setHaState(final HAConfig.HAState haState) {
if (haState != null) {
this.haState = haState.toString().toLowerCase();
}
}
public void setPreviousHaState(final HAConfig.HAState previousHaState) {
if (previousHaState != null) {
this.previousHaState = previousHaState.toString().toLowerCase();
}
}
public void setHaEvent(final HAConfig.Event haEvent) {
this.haEvent = haEvent.toString().toLowerCase();
}
public void setActivityCounter(Long activityCounter) {
this.activityCounter = activityCounter;
}
public void setRecoveryCounter(Long recoveryCounter) {
this.recoveryCounter = recoveryCounter;
}
}

View File

@ -21,6 +21,7 @@ import java.util.List;
import javax.inject.Inject;
import org.apache.log4j.Logger;
import org.apache.cloudstack.ha.HAManager;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
@ -48,6 +49,8 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator {
ResourceManager _resourceMgr;
@Inject
MockConfigurationDao _mockConfigDao;
@Inject
private HAManager haManager;
protected SimulatorInvestigator() {
}
@ -58,6 +61,10 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator {
return null;
}
if (haManager.isHAEligible(agent)) {
return haManager.getHostStatus(agent);
}
CheckOnHostCommand cmd = new CheckOnHostCommand(agent);
List<HostVO> neighbors = _resourceMgr.listHostsInClusterByStatus(agent.getClusterId(), Status.Up);
for (HostVO neighbor : neighbors) {
@ -79,6 +86,9 @@ public class SimulatorInvestigator extends AdapterBase implements Investigator {
@Override
public boolean isVmAlive(VirtualMachine vm, Host host) throws UnknownVM {
if (haManager.isHAEligible(host)) {
return haManager.isVMAliveOnHost(host);
}
CheckVirtualMachineCommand cmd = new CheckVirtualMachineCommand(vm.getInstanceName());
try {
Answer answer = _agentMgr.send(vm.getHostId(), cmd);

View File

@ -0,0 +1,152 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import com.cloud.api.response.SimulatorHAStateResponse;
import com.cloud.host.Host;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.utils.fsm.StateListener;
import com.cloud.utils.fsm.StateMachine2;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HAFenceException;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.cloudstack.ha.provider.HARecoveryException;
import org.apache.cloudstack.ha.provider.host.HAAbstractHostProvider;
import org.joda.time.DateTime;
import javax.inject.Inject;
import java.security.InvalidParameterException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
public class SimulatorHAProvider extends HAAbstractHostProvider implements HAProvider<Host>, StateListener<HAConfig.HAState, HAConfig.Event, HAConfig> {
@Inject
private HAManager haManager;
private final Map<Long, SimulatorHAState> hostHAStateMap = new ConcurrentHashMap<>();
public SimulatorHAProvider() {
HAConfig.HAState.getStateMachine().registerListener(this);
}
public void setHAStateForHost(final Long hostId, final SimulatorHAState state) {
hostHAStateMap.put(hostId, state);
haManager.purgeHACounter(hostId, HAResource.ResourceType.Host);
}
public List<SimulatorHAStateResponse> listHAStateTransitions(final Long hostId) {
final SimulatorHAState haState = hostHAStateMap.get(hostId);
if (haState == null) {
return Collections.emptyList();
}
return haState.listRecentStateTransitions();
}
@Override
public HAResource.ResourceType resourceType() {
return HAResource.ResourceType.Host;
}
@Override
public HAResource.ResourceSubType resourceSubType() {
return HAResource.ResourceSubType.Simulator;
}
@Override
public boolean isEligible(final Host host) {
final SimulatorHAState haState = hostHAStateMap.get(host.getId());
return !isInMaintenanceMode(host) && !isDisabled(host) && haState != null
&& Hypervisor.HypervisorType.Simulator.equals(host.getHypervisorType());
}
@Override
public boolean isHealthy(final Host host) throws HACheckerException {
final SimulatorHAState haState = hostHAStateMap.get(host.getId());
return haState != null && haState.isHealthy();
}
@Override
public boolean hasActivity(final Host host, final DateTime afterThis) throws HACheckerException {
final SimulatorHAState haState = hostHAStateMap.get(host.getId());
return haState != null && haState.hasActivity();
}
@Override
public boolean recover(final Host host) throws HARecoveryException {
final SimulatorHAState haState = hostHAStateMap.get(host.getId());
return haState != null && haState.canRecover();
}
@Override
public boolean fence(final Host host) throws HAFenceException {
final SimulatorHAState haState = hostHAStateMap.get(host.getId());
return haState != null && haState.canFenced();
}
@Override
public Object getConfigValue(final HAProvider.HAProviderConfig name, final Host host) {
switch (name) {
case HealthCheckTimeout:
return 5L;
case ActivityCheckTimeout:
return 5L;
case RecoveryTimeout:
return 5L;
case FenceTimeout:
return 5L;
case MaxActivityCheckInterval:
return 1L;
case MaxActivityChecks:
return 3L;
case ActivityCheckFailureRatio:
final SimulatorHAState haState = hostHAStateMap.get(host.getId());
return (haState != null && haState.hasActivity()) ? 1.0 : 0.0;
case MaxDegradedWaitTimeout:
return 1L;
case MaxRecoveryAttempts:
return 2L;
case RecoveryWaitTimeout:
return 1L;
default:
throw new InvalidParameterException("Unknown HAProviderConfig " + name.toString());
}
}
@Override
public boolean preStateTransitionEvent(final HAConfig.HAState oldState, final HAConfig.Event event,
final HAConfig.HAState newState, final HAConfig vo, final boolean status, final Object opaque) {
return false;
}
@Override
public boolean postStateTransitionEvent(final StateMachine2.Transition<HAConfig.HAState, HAConfig.Event> transition,
final HAConfig vo, final boolean status, final Object opaque) {
if (vo.getResourceType() != HAResource.ResourceType.Host) {
return false;
}
final SimulatorHAState haState = hostHAStateMap.get(vo.getResourceId());
if (haState == null || !status) {
return false;
}
final HAResourceCounter counter = haManager.getHACounter(vo.getResourceId(), vo.getResourceType());
return haState.addStateTransition(transition.getToState(), transition.getCurrentState(), transition.getEvent(), counter);
}
}

View File

@ -0,0 +1,89 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import com.cloud.api.response.SimulatorHAStateResponse;
import com.google.common.collect.EvictingQueue;
import java.util.ArrayList;
import java.util.List;
import java.util.Queue;
public final class SimulatorHAState {
private boolean healthy;
private boolean activity;
private boolean recover;
private boolean fence;
private Queue<SimulatorHAStateResponse> stateTransitions = EvictingQueue.create(100);
public SimulatorHAState(boolean healthy, boolean activity, boolean recover, boolean fence) {
this.healthy = healthy;
this.activity = activity;
this.recover = recover;
this.fence = fence;
}
public boolean isHealthy() {
return healthy;
}
public void setHealthy(boolean healthy) {
this.healthy = healthy;
}
public boolean hasActivity() {
return activity;
}
public void setActivity(boolean activity) {
this.activity = activity;
}
public boolean canRecover() {
return recover;
}
public void setRecover(boolean recover) {
this.recover = recover;
}
public boolean canFenced() {
return fence;
}
public void setFence(boolean fence) {
this.fence = fence;
}
public boolean addStateTransition(final HAConfig.HAState newHaState, final HAConfig.HAState oldHaState, final HAConfig.Event event, final HAResourceCounter counter) {
final SimulatorHAStateResponse stateResponse = new SimulatorHAStateResponse();
stateResponse.setHaState(newHaState);
stateResponse.setPreviousHaState(oldHaState);
stateResponse.setHaEvent(event);
if (counter != null) {
stateResponse.setActivityCounter(counter.getActivityCheckCounter());
stateResponse.setRecoveryCounter(counter.getRecoveryCounter());
}
stateResponse.setObjectName("hastatetransition");
return stateTransitions.add(stateResponse);
}
public List<SimulatorHAStateResponse> listRecentStateTransitions() {
return new ArrayList<>(stateTransitions);
}
}

View File

@ -0,0 +1,134 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
help() {
printf "Usage: $0
-i nfs server ip
-p nfs server path
-m mount point
-h host
-u volume uuid list
-t time on ms
-d suspect time\n"
exit 1
}
#set -x
NfsSvrIP=
NfsSvrPath=
MountPoint=
HostIP=
UUIDList=
MSTime=
SuspectTime=
while getopts 'i:p:m:u:t:h:d:' OPTION
do
case $OPTION in
i)
NfsSvrIP="$OPTARG"
;;
p)
NfsSvrPath="$OPTARG"
;;
m)
MountPoint="$OPTARG"
;;
h)
HostIP="$OPTARG"
;;
u)
UUIDList="$OPTARG"
;;
t)
MSTime="$OPTARG"
;;
d)
SuspectTime="$OPTARG"
;;
*)
help
;;
esac
done
if [ -z "$NfsSvrIP" ]
then
exit 2
fi
if [ -z "$SuspectTime" ]
then
exit 2
fi
hbFile="$MountPoint/KVMHA/hb-$HostIP"
acFile="$MountPoint/KVMHA/ac-$HostIP"
# First check: heartbeat file
now=$(date +%s)
hb=$(cat $hbFile)
diff=$(expr $now - $hb)
if [ $diff -lt 61 ]
then
echo "=====> ALIVE <====="
exit 0
fi
if [ -z "$UUIDList" ]
then
echo "=====> DEAD <======"
exit 0
fi
# Second check: disk activity check
cd $MountPoint
latestUpdateTime=$(stat -c %Y $(echo $UUIDList | sed 's/,/ /g') | sort -nr | head -1)
if [ ! -f $acFile ]; then
echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile
if [[ $latestUpdateTime -gt $SuspectTime ]]; then
echo "=====> ALIVE <====="
else
echo "=====> DEAD <======"
fi
else
acTime=$(cat $acFile)
arrTime=(${acTime//:/ })
lastSuspectTime=${arrTime[0]}
lastUpdateTime=${arrTime[1]}
echo "$SuspectTime:$latestUpdateTime:$MSTime" > $acFile
if [[ $lastSuspectTime -ne $SuspectTime ]]; then
if [[ $latestUpdateTime -gt $SuspectTime ]]; then
echo "=====> ALIVE <====="
else
echo "=====> DEAD <======"
fi
else
if [[ $latestUpdateTime -gt $lastUpdateTime ]]; then
echo "=====> ALIVE <====="
else
echo "=====> DEAD <======"
fi
fi
fi
exit 0

View File

@ -74,6 +74,11 @@
<bean id="bgPollManager" class="org.apache.cloudstack.poll.BackgroundPollManagerImpl">
</bean>
<!-- the new HA manager -->
<bean id="haManagerImpl" class="org.apache.cloudstack.ha.HAManagerImpl">
<property name="haProviders" value="#{haProvidersRegistry.registered}" />
</bean>
<bean id="highAvailabilityManagerExtImpl" class="com.cloud.ha.HighAvailabilityManagerExtImpl">
<property name="investigators" value="#{haInvestigatorsRegistry.registered}" />
<property name="fenceBuilders" value="#{haFenceBuildersRegistry.registered}" />

View File

@ -760,6 +760,7 @@ public class AlertManagerImpl extends ManagerBase implements AlertManager, Confi
(alertType != AlertManager.AlertType.ALERT_TYPE_RESOURCE_LIMIT_EXCEEDED) &&
(alertType != AlertManager.AlertType.ALERT_TYPE_UPLOAD_FAILED) &&
(alertType != AlertManager.AlertType.ALERT_TYPE_OOBM_AUTH_ERROR) &&
(alertType != AlertManager.AlertType.ALERT_TYPE_HA_ACTION) &&
(alertType != AlertManager.AlertType.ALERT_TYPE_CA_CERT)) {
alert = _alertDao.getLastAlert(alertType.getType(), dataCenterId, podId, clusterId);
}

View File

@ -35,6 +35,8 @@ import org.apache.cloudstack.api.response.HostForMigrationResponse;
import org.apache.cloudstack.api.response.HostResponse;
import org.apache.cloudstack.api.response.VgpuResponse;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.dao.HAConfigDao;
import org.apache.cloudstack.outofbandmanagement.dao.OutOfBandManagementDao;
import com.cloud.api.ApiDBUtils;
@ -59,6 +61,8 @@ public class HostJoinDaoImpl extends GenericDaoBase<HostJoinVO, Long> implements
@Inject
private HostDetailsDao hostDetailsDao;
@Inject
private HAConfigDao haConfigDao;
@Inject
private OutOfBandManagementDao outOfBandManagementDao;
private final SearchBuilder<HostJoinVO> hostSearch;
@ -231,6 +235,7 @@ public class HostJoinDaoImpl extends GenericDaoBase<HostJoinVO, Long> implements
}
}
hostResponse.setHostHAResponse(haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host));
hostResponse.setOutOfBandManagementResponse(outOfBandManagementDao.findByHost(host.getId()));
hostResponse.setResourceState(host.getResourceState().toString());

View File

@ -36,6 +36,7 @@ import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.org.Cluster;
import com.cloud.resource.ResourceState;
import com.cloud.utils.db.GenericDao;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.outofbandmanagement.OutOfBandManagement;
/**
@ -99,6 +100,15 @@ public class HostJoinVO extends BaseViewVO implements InternalIdentity, Identity
@Enumerated(value = EnumType.STRING)
private OutOfBandManagement.PowerState outOfBandManagementPowerState;
@Column(name = "ha_enabled")
private boolean hostHAEnabled = false;
@Column(name = "ha_state")
private HAConfig.HAState hostHAState;
@Column(name = "ha_provider")
private String hostHAProvider;
@Column(name = "resource_state")
@Enumerated(value = EnumType.STRING)
private ResourceState resourceState;
@ -260,6 +270,18 @@ public class HostJoinVO extends BaseViewVO implements InternalIdentity, Identity
return outOfBandManagementPowerState;
}
public boolean isHostHAEnabled() {
return hostHAEnabled;
}
public HAConfig.HAState getHostHAState() {
return hostHAState;
}
public String getHostHAProvider() {
return hostHAProvider;
}
public ResourceState getResourceState() {
return resourceState;
}

View File

@ -1032,7 +1032,6 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C
Command[] cmdArray = cmds.toCommands();
for (Command cmd : cmdArray) {
long targetHostId = _hvGuruMgr.getGuruProcessedCommandTargetHost(hostId, cmd);
answers.add(_agentMgr.send(targetHostId, cmd));
}
return new Pair<Long, Answer[]>(hostId, answers.toArray(new Answer[answers.size()]));
@ -2347,8 +2346,6 @@ public class StorageManagerImpl extends ManagerBase implements StorageManager, C
" for template id " +templateOnImageStore.getTemplateId(), th);
}
}
}
// get bytesReadRate from service_offering, disk_offering and vm.disk.throttling.bytes_read_rate

View File

@ -0,0 +1,77 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import com.cloud.ha.Investigator;
import com.cloud.host.Host;
import com.cloud.host.Status;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.ha.provider.HAProvider;
public interface HAManager extends HAConfigManager {
ConfigKey<Integer> MaxConcurrentHealthCheckOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.concurrent.health.check.operations",
"50",
"The number of concurrent health check operations per management server. This setting determines the size of the thread pool consuming the HEALTH CHECK queue.", true);
ConfigKey<Integer> MaxPendingHealthCheckOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.pending.health.check.operations",
"5000",
"The number of pending health check operations per management server. This setting determines the size of the HEALTH CHECK queue.", true);
ConfigKey<Integer> MaxConcurrentActivityCheckOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.concurrent.activity.check.operations",
"25",
"The number of concurrent activity check operations per management server. This setting determines the size of the thread pool consuming the ACTIVITY CHECK queue.",
true);
ConfigKey<Integer> MaxPendingActivityCheckOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.pending.activity.check.operations",
"2500",
"The number of pending activity check operations per management server. This setting determines the size of the size of the ACTIVITY CHECK queue.", true);
ConfigKey<Integer> MaxConcurrentRecoveryOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.concurrent.recovery.operations",
"25",
"The number of concurrent recovery operations per management server.", true);
ConfigKey<Integer> MaxPendingRecoveryOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.pending.recovery.operations",
"2500",
"The number of pending recovery operations per management server. This setting determines the size of the size of the RECOVERY queue.", true);
ConfigKey<Integer> MaxConcurrentFenceOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.concurrent.fence.operations",
"25",
"The number of concurrent fence operations per management server.", true);
ConfigKey<Integer> MaxPendingFenceOperations = new ConfigKey<>("Advanced", Integer.class,
"ha.max.pending.fence.operations",
"2500",
"The number of pending fence operations per management server. This setting determines the size of the size of the FENCE queue.", true);
boolean transitionHAState(final HAConfig.Event event, final HAConfig haConfig);
HAProvider getHAProvider(final String name);
HAResourceCounter getHACounter(final Long resourceId, final HAResource.ResourceType resourceType);
void purgeHACounter(final Long resourceId, final HAResource.ResourceType resourceType);
boolean isHAEligible(final HAResource resource);
Boolean isVMAliveOnHost(final Host host) throws Investigator.UnknownVM;
Status getHostStatus(final Host host);
}

View File

@ -0,0 +1,744 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import com.cloud.cluster.ClusterManagerListener;
import com.cloud.cluster.ManagementServerHost;
import com.cloud.dc.ClusterDetailsDao;
import com.cloud.dc.ClusterDetailsVO;
import com.cloud.dc.DataCenter;
import com.cloud.dc.DataCenterDetailVO;
import com.cloud.dc.dao.DataCenterDetailsDao;
import com.cloud.domain.Domain;
import com.cloud.event.ActionEvent;
import com.cloud.event.ActionEventUtils;
import com.cloud.event.EventTypes;
import com.cloud.ha.Investigator;
import com.cloud.host.Host;
import com.cloud.host.Status;
import com.cloud.host.dao.HostDao;
import com.cloud.org.Cluster;
import com.cloud.utils.component.ComponentContext;
import com.cloud.utils.component.ManagerBase;
import com.cloud.utils.component.PluggableService;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.db.TransactionCallback;
import com.cloud.utils.db.TransactionStatus;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.fsm.NoTransitionException;
import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.command.admin.ha.ConfigureHAForHostCmd;
import org.apache.cloudstack.api.command.admin.ha.DisableHAForClusterCmd;
import org.apache.cloudstack.api.command.admin.ha.DisableHAForHostCmd;
import org.apache.cloudstack.api.command.admin.ha.DisableHAForZoneCmd;
import org.apache.cloudstack.api.command.admin.ha.EnableHAForClusterCmd;
import org.apache.cloudstack.api.command.admin.ha.EnableHAForHostCmd;
import org.apache.cloudstack.api.command.admin.ha.EnableHAForZoneCmd;
import org.apache.cloudstack.api.command.admin.ha.ListHostHAProvidersCmd;
import org.apache.cloudstack.api.command.admin.ha.ListHostHAResourcesCmd;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.ha.dao.HAConfigDao;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.cloudstack.ha.provider.HAProvider.HAProviderConfig;
import org.apache.cloudstack.ha.task.ActivityCheckTask;
import org.apache.cloudstack.ha.task.FenceTask;
import org.apache.cloudstack.ha.task.HealthCheckTask;
import org.apache.cloudstack.ha.task.RecoveryTask;
import org.apache.cloudstack.kernel.Partition;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.poll.BackgroundPollManager;
import org.apache.cloudstack.poll.BackgroundPollTask;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.log4j.Logger;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
public final class HAManagerImpl extends ManagerBase implements HAManager, ClusterManagerListener, PluggableService, Configurable {
public static final Logger LOG = Logger.getLogger(HAManagerImpl.class);
@Inject
private HAConfigDao haConfigDao;
@Inject
private HostDao hostDao;
@Inject
private ClusterDetailsDao clusterDetailsDao;
@Inject
private DataCenterDetailsDao dataCenterDetailsDao;
@Inject
private BackgroundPollManager pollManager;
private List<HAProvider<HAResource>> haProviders;
private Map<String, HAProvider<HAResource>> haProviderMap = new HashMap<>();
private static ExecutorService healthCheckExecutor;
private static ExecutorService activityCheckExecutor;
private static ExecutorService recoveryExecutor;
private static ExecutorService fenceExecutor;
private static final String HA_ENABLED_DETAIL = "resourceHAEnabled";
//////////////////////////////////////////////////////
//////////////// HA Manager methods //////////////////
//////////////////////////////////////////////////////
public Map<String, HAResourceCounter> haCounterMap = new ConcurrentHashMap<>();
public HAProvider<HAResource> getHAProvider(final String name) {
return haProviderMap.get(name);
}
private String resourceCounterKey(final Long resourceId, final HAResource.ResourceType resourceType) {
return resourceId.toString() + resourceType.toString();
}
public synchronized HAResourceCounter getHACounter(final Long resourceId, final HAResource.ResourceType resourceType) {
final String key = resourceCounterKey(resourceId, resourceType);
if (!haCounterMap.containsKey(key)) {
haCounterMap.put(key, new HAResourceCounter());
}
return haCounterMap.get(key);
}
public synchronized void purgeHACounter(final Long resourceId, final HAResource.ResourceType resourceType) {
final String key = resourceCounterKey(resourceId, resourceType);
if (haCounterMap.containsKey(key)) {
haCounterMap.remove(key);
}
}
public boolean transitionHAState(final HAConfig.Event event, final HAConfig haConfig) {
if (event == null || haConfig == null) {
return false;
}
final HAConfig.HAState currentHAState = haConfig.getState();
try {
final HAConfig.HAState nextState = HAConfig.HAState.getStateMachine().getNextState(currentHAState, event);
boolean result = HAConfig.HAState.getStateMachine().transitTo(haConfig, event, null, haConfigDao);
if (result) {
final String message = String.format("Transitioned host HA state from:%s to:%s due to event:%s for the host id:%d",
currentHAState, nextState, event, haConfig.getResourceId());
LOG.debug(message);
if (nextState == HAConfig.HAState.Recovering || nextState == HAConfig.HAState.Fencing || nextState == HAConfig.HAState.Fenced) {
ActionEventUtils.onActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(),
Domain.ROOT_DOMAIN, EventTypes.EVENT_HA_STATE_TRANSITION, message);
}
}
return result;
} catch (NoTransitionException e) {
if (LOG.isTraceEnabled()) {
LOG.trace("Unable to find next HA state for current HA state: " + currentHAState + " for event: " + event + " for host" + haConfig.getResourceId());
}
}
return false;
}
private boolean transitionResourceStateToDisabled(final Partition partition) {
List<? extends HAResource> resources;
if (partition.partitionType() == Partition.PartitionType.Cluster) {
resources = hostDao.findByClusterId(partition.getId());
} else if (partition.partitionType() == Partition.PartitionType.Zone) {
resources = hostDao.findByDataCenterId(partition.getId());
} else {
return true;
}
boolean result = true;
for (final HAResource resource: resources) {
result = result && transitionHAState(HAConfig.Event.Disabled,
haConfigDao.findHAResource(resource.getId(), resource.resourceType()));
}
return result;
}
private boolean checkHAOwnership(final HAConfig haConfig) {
// Skip for resources not owned by this mgmt server
return !(haConfig.getManagementServerId() != null
&& haConfig.getManagementServerId() != ManagementServerNode.getManagementServerId());
}
private HAResource validateAndFindHAResource(final HAConfig haConfig) {
HAResource resource = null;
if (haConfig.getResourceType() == HAResource.ResourceType.Host) {
final Host host = hostDao.findById(haConfig.getResourceId());
if (host != null && host.getRemoved() != null) {
return null;
}
resource = host;
if (resource == null && haConfig.getState() != HAConfig.HAState.Disabled) {
disableHA(haConfig.getResourceId(), haConfig.getResourceType());
return null;
}
}
if (!haConfig.isEnabled() || !isHAEnabledForZone(resource) || !isHAEnabledForCluster(resource)) {
if (haConfig.getState() != HAConfig.HAState.Disabled) {
if (transitionHAState(HAConfig.Event.Disabled, haConfig) ) {
purgeHACounter(haConfig.getResourceId(), haConfig.getResourceType());
}
}
return null;
} else if (haConfig.getState() == HAConfig.HAState.Disabled) {
transitionHAState(HAConfig.Event.Enabled, haConfig);
}
return resource;
}
private HAProvider<HAResource> validateAndFindHAProvider(final HAConfig haConfig, final HAResource resource) {
final HAProvider<HAResource> haProvider = haProviderMap.get(haConfig.getHaProvider());
if (haProvider != null && !haProvider.isEligible(resource)) {
if (haConfig.getState() != HAConfig.HAState.Ineligible) {
transitionHAState(HAConfig.Event.Ineligible, haConfig);
}
return null;
} else if (haConfig.getState() == HAConfig.HAState.Ineligible) {
transitionHAState(HAConfig.Event.Eligible, haConfig);
}
return haProvider;
}
public boolean isHAEnabledForZone(final HAResource resource) {
if (resource == null || resource.getDataCenterId() < 1L) {
return true;
}
final DataCenterDetailVO zoneDetails = dataCenterDetailsDao.findDetail(resource.getDataCenterId(), HA_ENABLED_DETAIL);
return zoneDetails == null || Strings.isNullOrEmpty(zoneDetails.getValue()) || Boolean.valueOf(zoneDetails.getValue());
}
private boolean isHAEnabledForCluster(final HAResource resource) {
if (resource == null || resource.getClusterId() == null) {
return true;
}
final ClusterDetailsVO clusterDetails = clusterDetailsDao.findDetail(resource.getClusterId(), HA_ENABLED_DETAIL);
return clusterDetails == null || Strings.isNullOrEmpty(clusterDetails.getValue()) || Boolean.valueOf(clusterDetails.getValue());
}
private boolean isHAEligibleForResource(final HAResource resource) {
if (resource == null || resource.getId() < 1L) {
return false;
}
HAResource.ResourceType resourceType = null;
if (resource instanceof Host) {
resourceType = HAResource.ResourceType.Host;
}
if (resourceType == null) {
return false;
}
final HAConfig haConfig = haConfigDao.findHAResource(resource.getId(), resourceType);
return haConfig != null && haConfig.isEnabled()
&& haConfig.getState() != HAConfig.HAState.Disabled
&& haConfig.getState() != HAConfig.HAState.Ineligible;
}
public boolean isHAEligible(final HAResource resource) {
return resource != null && isHAEnabledForZone(resource)
&& isHAEnabledForCluster(resource)
&& isHAEligibleForResource(resource);
}
public void validateHAProviderConfigForResource(final Long resourceId, final HAResource.ResourceType resourceType, final HAProvider<HAResource> haProvider) {
if (HAResource.ResourceType.Host.equals(resourceType)) {
final Host host = hostDao.findById(resourceId);
if (host.getHypervisorType() == null || haProvider.resourceSubType() == null || !host.getHypervisorType().toString().equals(haProvider.resourceSubType().toString())) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "Incompatible haprovider provided for the resource of hypervisor type:" + host.getHypervisorType());
}
}
}
////////////////////////////////////////////////////////////////////
//////////////// HA Investigator wrapper for Old HA ////////////////
////////////////////////////////////////////////////////////////////
public Boolean isVMAliveOnHost(final Host host) throws Investigator.UnknownVM {
final HAConfig haConfig = haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host);
if (haConfig != null) {
if (haConfig.getState() == HAConfig.HAState.Fenced) {
if (LOG.isDebugEnabled()){
LOG.debug("HA: Host is fenced " + host.getId());
}
return false;
}
if (LOG.isDebugEnabled()){
LOG.debug("HA: HOST is alive " + host.getId());
}
return true;
}
throw new Investigator.UnknownVM();
}
public Status getHostStatus(final Host host) {
final HAConfig haConfig = haConfigDao.findHAResource(host.getId(), HAResource.ResourceType.Host);
if (haConfig != null) {
if (haConfig.getState() == HAConfig.HAState.Fenced) {
if (LOG.isDebugEnabled()){
LOG.debug("HA: Agent is available/suspect/checking Up " + host.getId());
}
return Status.Down;
} else if (haConfig.getState() == HAConfig.HAState.Degraded || haConfig.getState() == HAConfig.HAState.Recovering || haConfig.getState() == HAConfig.HAState.Recovered || haConfig.getState() == HAConfig.HAState.Fencing) {
if (LOG.isDebugEnabled()){
LOG.debug("HA: Agent is disconnected " + host.getId());
}
return Status.Disconnected;
}
return Status.Up;
}
return Status.Unknown;
}
//////////////////////////////////////////////////////
//////////////// HA API handlers /////////////////////
//////////////////////////////////////////////////////
private boolean configureHA(final Long resourceId, final HAResource.ResourceType resourceType, final Boolean enable, final String haProvider) {
return Transaction.execute(new TransactionCallback<Boolean>() {
@Override
public Boolean doInTransaction(TransactionStatus status) {
HAConfigVO haConfig = (HAConfigVO) haConfigDao.findHAResource(resourceId, resourceType);
if (haConfig == null) {
haConfig = new HAConfigVO();
if (haProvider != null) {
haConfig.setHaProvider(haProvider);
}
if (enable != null) {
haConfig.setEnabled(enable);
haConfig.setManagementServerId(ManagementServerNode.getManagementServerId());
}
haConfig.setResourceId(resourceId);
haConfig.setResourceType(resourceType);
if (Strings.isNullOrEmpty(haConfig.getHaProvider())) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "HAProvider is not provided for the resource, failing configuration.");
}
if (haConfigDao.persist(haConfig) != null) {
return true;
}
} else {
if (enable != null) {
haConfig.setEnabled(enable);
}
if (haProvider != null) {
haConfig.setHaProvider(haProvider);
}
if (Strings.isNullOrEmpty(haConfig.getHaProvider())) {
throw new ServerApiException(ApiErrorCode.PARAM_ERROR, "HAProvider is not provided for the resource, failing configuration.");
}
return haConfigDao.update(haConfig.getId(), haConfig);
}
return false;
}
});
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_CONFIGURE, eventDescription = "configuring HA for resource")
public boolean configureHA(final Long resourceId, final HAResource.ResourceType resourceType, final String haProvider) {
Preconditions.checkArgument(resourceId != null && resourceId > 0L);
Preconditions.checkArgument(resourceType != null);
Preconditions.checkArgument(!Strings.isNullOrEmpty(haProvider));
if (!haProviderMap.containsKey(haProvider.toLowerCase())) {
throw new CloudRuntimeException("Given HA provider does not exist.");
}
validateHAProviderConfigForResource(resourceId, resourceType, haProviderMap.get(haProvider.toLowerCase()));
return configureHA(resourceId, resourceType, null, haProvider.toLowerCase());
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for resource")
public boolean enableHA(final Long resourceId, final HAResource.ResourceType resourceType) {
Preconditions.checkArgument(resourceId != null && resourceId > 0L);
Preconditions.checkArgument(resourceType != null);
return configureHA(resourceId, resourceType, true, null);
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for resource")
public boolean disableHA(final Long resourceId, final HAResource.ResourceType resourceType) {
Preconditions.checkArgument(resourceId != null && resourceId > 0L);
Preconditions.checkArgument(resourceType != null);
boolean result = configureHA(resourceId, resourceType, false, null);
if (result) {
transitionHAState(HAConfig.Event.Disabled, haConfigDao.findHAResource(resourceId, resourceType));
purgeHACounter(resourceId, resourceType);
}
return result;
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for a cluster")
public boolean enableHA(final Cluster cluster) {
clusterDetailsDao.persist(cluster.getId(), HA_ENABLED_DETAIL, String.valueOf(true));
return true;
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for a cluster")
public boolean disableHA(final Cluster cluster) {
clusterDetailsDao.persist(cluster.getId(), HA_ENABLED_DETAIL, String.valueOf(false));
return transitionResourceStateToDisabled(cluster);
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_ENABLE, eventDescription = "enabling HA for a zone")
public boolean enableHA(final DataCenter zone) {
dataCenterDetailsDao.persist(zone.getId(), HA_ENABLED_DETAIL, String.valueOf(true));
return true;
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_HA_RESOURCE_DISABLE, eventDescription = "disabling HA for a zone")
public boolean disableHA(final DataCenter zone) {
dataCenterDetailsDao.persist(zone.getId(), HA_ENABLED_DETAIL, String.valueOf(false));
return transitionResourceStateToDisabled(zone);
}
@Override
public List<HAConfig> listHAResources(final Long resourceId, final HAResource.ResourceType resourceType) {
return haConfigDao.listHAResource(resourceId, resourceType);
}
@Override
public List<String> listHAProviders(final HAResource.ResourceType resourceType, final HAResource.ResourceSubType entityType) {
final List<String> haProviderNames = new ArrayList<>();
for (final HAProvider<HAResource> haProvider : haProviders) {
if (haProvider.resourceType().equals(resourceType) && haProvider.resourceSubType().equals(entityType)) {
haProviderNames.add(haProvider.getClass().getSimpleName());
}
}
return haProviderNames;
}
@Override
public List<Class<?>> getCommands() {
List<Class<?>> cmdList = new ArrayList<>();
cmdList.add(ConfigureHAForHostCmd.class);
cmdList.add(EnableHAForHostCmd.class);
cmdList.add(EnableHAForClusterCmd.class);
cmdList.add(EnableHAForZoneCmd.class);
cmdList.add(DisableHAForHostCmd.class);
cmdList.add(DisableHAForClusterCmd.class);
cmdList.add(DisableHAForZoneCmd.class);
cmdList.add(ListHostHAResourcesCmd.class);
cmdList.add(ListHostHAProvidersCmd.class);
return cmdList;
}
//////////////////////////////////////////////////////////////////
//////////////// Clustered Manager Listeners /////////////////////
//////////////////////////////////////////////////////////////////
@Override
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
}
@Override
public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
}
@Override
public void onManagementNodeIsolated() {
}
///////////////////////////////////////////////////
//////////////// Manager Init /////////////////////
///////////////////////////////////////////////////
@Override
public boolean start() {
haProviderMap.clear();
for (final HAProvider<HAResource> haProvider : haProviders) {
haProviderMap.put(haProvider.getClass().getSimpleName().toLowerCase(), haProvider);
}
return true;
}
@Override
public boolean stop() {
haConfigDao.expireServerOwnership(ManagementServerNode.getManagementServerId());
return true;
}
@Override
public boolean configure(final String name, final Map<String, Object> params) throws ConfigurationException {
// Health Check
final int healthCheckWorkers = MaxConcurrentHealthCheckOperations.value();
final int healthCheckQueueSize = MaxPendingHealthCheckOperations.value();
healthCheckExecutor = new ThreadPoolExecutor(healthCheckWorkers, healthCheckWorkers,
0L, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<Runnable>(healthCheckQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy());
// Activity Check
final int activityCheckWorkers = MaxConcurrentActivityCheckOperations.value();
final int activityCheckQueueSize = MaxPendingActivityCheckOperations.value();
activityCheckExecutor = new ThreadPoolExecutor(activityCheckWorkers, activityCheckWorkers,
0L, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<Runnable>(activityCheckQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy());
// Recovery
final int recoveryOperationWorkers = MaxConcurrentRecoveryOperations.value();
final int recoveryOperationQueueSize = MaxPendingRecoveryOperations.value();
recoveryExecutor = new ThreadPoolExecutor(recoveryOperationWorkers, recoveryOperationWorkers,
0L, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<Runnable>(recoveryOperationQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy());
// Fence
final int fenceOperationWorkers = MaxConcurrentFenceOperations.value();
final int fenceOperationQueueSize = MaxPendingFenceOperations.value();
fenceExecutor = new ThreadPoolExecutor(fenceOperationWorkers, fenceOperationWorkers,
0L, TimeUnit.MILLISECONDS,
new ArrayBlockingQueue<Runnable>(fenceOperationQueueSize, true), new ThreadPoolExecutor.CallerRunsPolicy());
pollManager.submitTask(new HealthCheckPollTask());
pollManager.submitTask(new ActivityCheckPollTask());
pollManager.submitTask(new RecoveryPollTask());
pollManager.submitTask(new FencingPollTask());
LOG.debug("HA manager has been configured");
return true;
}
public void setHaProviders(List<HAProvider<HAResource>> haProviders) {
this.haProviders = haProviders;
}
@Override
public String getConfigComponentName() {
return HAManager.class.getSimpleName();
}
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {
MaxConcurrentHealthCheckOperations,
MaxPendingHealthCheckOperations,
MaxConcurrentActivityCheckOperations,
MaxPendingActivityCheckOperations,
MaxConcurrentRecoveryOperations,
MaxPendingRecoveryOperations,
MaxConcurrentFenceOperations,
MaxPendingFenceOperations
};
}
/////////////////////////////////////////////////
//////////////// Poll Tasks /////////////////////
/////////////////////////////////////////////////
private final class HealthCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask {
@Override
protected void runInContext() {
try {
if (LOG.isTraceEnabled()) {
LOG.trace("HA health check task is running...");
}
final List<HAConfig> haConfigList = new ArrayList<HAConfig>(haConfigDao.listAll());
for (final HAConfig haConfig : haConfigList) {
if (!checkHAOwnership(haConfig)) {
continue;
}
final HAResource resource = validateAndFindHAResource(haConfig);
if (resource == null) {
continue;
}
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
if (haProvider == null) {
continue;
}
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
if (haConfig.getState() == HAConfig.HAState.Suspect) {
if (counter.canPerformActivityCheck((Long)(haProvider.getConfigValue(HAProviderConfig.MaxActivityCheckInterval, resource)))) {
transitionHAState(HAConfig.Event.PerformActivityCheck, haConfig);
}
}
if (haConfig.getState() == HAConfig.HAState.Degraded) {
if (counter.canRecheckActivity((Long)(haProvider.getConfigValue(HAProviderConfig.MaxDegradedWaitTimeout, resource)))) {
transitionHAState(HAConfig.Event.PeriodicRecheckResourceActivity, haConfig);
}
}
switch (haConfig.getState()) {
case Available:
case Suspect:
case Degraded:
case Fenced:
final HealthCheckTask task = ComponentContext.inject(new HealthCheckTask(resource, haProvider, haConfig,
HAProviderConfig.HealthCheckTimeout, healthCheckExecutor));
healthCheckExecutor.submit(task);
break;
default:
break;
}
}
} catch (Throwable t) {
LOG.error("Error trying to perform health checks in HA manager", t);
}
}
}
private final class ActivityCheckPollTask extends ManagedContextRunnable implements BackgroundPollTask {
@Override
protected void runInContext() {
try {
if (LOG.isTraceEnabled()) {
LOG.trace("HA activity check task is running...");
}
final List<HAConfig> haConfigList = new ArrayList<HAConfig>(haConfigDao.listAll());
for (final HAConfig haConfig : haConfigList) {
if (!checkHAOwnership(haConfig)) {
continue;
}
final HAResource resource = validateAndFindHAResource(haConfig);
if (resource == null) {
continue;
}
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
if (haProvider == null) {
continue;
}
if (haConfig.getState() == HAConfig.HAState.Checking) {
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
final ActivityCheckTask job = ComponentContext.inject(new ActivityCheckTask(resource, haProvider, haConfig,
HAProviderConfig.ActivityCheckTimeout, activityCheckExecutor, counter.getSuspectTimeStamp()));
activityCheckExecutor.submit(job);
}
}
} catch (Throwable t) {
LOG.error("Error trying to perform activity checks in HA manager", t);
}
}
}
private final class RecoveryPollTask extends ManagedContextRunnable implements BackgroundPollTask {
@Override
protected void runInContext() {
try {
if (LOG.isTraceEnabled()) {
LOG.trace("HA recovery task is running...");
}
final List<HAConfig> haConfigList = new ArrayList<HAConfig>(haConfigDao.listAll());
for (final HAConfig haConfig : haConfigList) {
if (!checkHAOwnership(haConfig)) {
continue;
}
final HAResource resource = validateAndFindHAResource(haConfig);
if (resource == null) {
continue;
}
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
if (haProvider == null) {
continue;
}
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
if (haConfig.getState() == HAConfig.HAState.Recovering) {
if (counter.canAttemptRecovery()) {
if (counter.getRecoveryCounter() >= (Long)(haProvider.getConfigValue(HAProviderConfig.MaxRecoveryAttempts, resource))) {
transitionHAState(HAConfig.Event.RecoveryOperationThresholdExceeded, haConfig);
continue;
}
final RecoveryTask task = ComponentContext.inject(new RecoveryTask(resource, haProvider, haConfig,
HAProviderConfig.RecoveryTimeout, recoveryExecutor));
final Future<Boolean> recoveryFuture = recoveryExecutor.submit(task);
counter.setRecoveryFuture(recoveryFuture);
counter.incrRecoveryCounter();
}
}
if (haConfig.getState() == HAConfig.HAState.Recovered) {
counter.markRecoveryStarted();
if (counter.canExitRecovery((Long)(haProvider.getConfigValue(HAProviderConfig.RecoveryWaitTimeout, resource)))) {
transitionHAState(HAConfig.Event.RecoveryWaitPeriodTimeout, haConfig);
counter.markRecoveryCompleted();
}
}
}
} catch (Throwable t) {
LOG.error("Error trying to perform recovery operation in HA manager", t);
}
}
}
private final class FencingPollTask extends ManagedContextRunnable implements BackgroundPollTask {
@Override
protected void runInContext() {
try {
if (LOG.isTraceEnabled()) {
LOG.trace("HA fencing task is running...");
}
final List<HAConfig> haConfigList = new ArrayList<HAConfig>(haConfigDao.listAll());
for (final HAConfig haConfig : haConfigList) {
if (!checkHAOwnership(haConfig)) {
continue;
}
final HAResource resource = validateAndFindHAResource(haConfig);
if (resource == null) {
continue;
}
final HAProvider<HAResource> haProvider = validateAndFindHAProvider(haConfig, resource);
if (haProvider == null) {
continue;
}
final HAResourceCounter counter = getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
if (counter.lastFencingCompleted()) {
if (haConfig.getState() == HAConfig.HAState.Fencing) {
final FenceTask task = ComponentContext.inject(new FenceTask(resource, haProvider, haConfig,
HAProviderConfig.FenceTimeout, fenceExecutor));
final Future<Boolean> fenceFuture = fenceExecutor.submit(task);
counter.setFenceFuture(fenceFuture);
}
}
}
} catch (Throwable t) {
LOG.error("Error trying to perform fencing operation in HA manager", t);
}
}
}
}

View File

@ -0,0 +1,128 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
public final class HAResourceCounter {
private AtomicLong activityCheckCounter = new AtomicLong(0);
private AtomicLong activityCheckFailureCounter = new AtomicLong(0);
private AtomicLong recoveryOperationCounter = new AtomicLong(0);
private Long firstHealthCheckFailureTimestamp;
private Long lastActivityCheckTimestamp;
private Long degradedTimestamp;
private Long recoverTimestamp;
private Future<Boolean> recoveryFuture;
private Future<Boolean> fenceFuture;
public long getActivityCheckCounter() {
return activityCheckCounter.get();
}
public long getRecoveryCounter() {
return recoveryOperationCounter.get();
}
public synchronized void incrActivityCounter(final boolean isFailure) {
lastActivityCheckTimestamp = System.currentTimeMillis();
activityCheckCounter.incrementAndGet();
if (isFailure) {
activityCheckFailureCounter.incrementAndGet();
}
}
public synchronized void incrRecoveryCounter() {
recoveryOperationCounter.incrementAndGet();
}
public synchronized void resetActivityCounter() {
activityCheckCounter.set(0);
activityCheckFailureCounter.set(0);
}
public synchronized void resetRecoveryCounter() {
recoverTimestamp = null;
recoveryFuture = null;
recoveryOperationCounter.set(0);
}
public synchronized void resetSuspectTimestamp() {
firstHealthCheckFailureTimestamp = null;
}
public boolean hasActivityThresholdExceeded(final double failureRatio) {
return activityCheckFailureCounter.get() > (activityCheckCounter.get() * failureRatio);
}
public boolean canPerformActivityCheck(final Long activityCheckInterval) {
return lastActivityCheckTimestamp == null || (System.currentTimeMillis() - lastActivityCheckTimestamp) > (activityCheckInterval * 1000);
}
public boolean canRecheckActivity(final Long maxDegradedPeriod) {
return degradedTimestamp == null || (System.currentTimeMillis() - degradedTimestamp) > (maxDegradedPeriod * 1000);
}
public boolean canExitRecovery(final Long maxRecoveryWaitPeriod) {
return recoverTimestamp != null && (System.currentTimeMillis() - recoverTimestamp) > (maxRecoveryWaitPeriod * 1000);
}
public long getSuspectTimeStamp() {
if (firstHealthCheckFailureTimestamp == null) {
firstHealthCheckFailureTimestamp = System.currentTimeMillis();
}
return firstHealthCheckFailureTimestamp;
}
public synchronized void markResourceSuspected() {
firstHealthCheckFailureTimestamp = System.currentTimeMillis();
}
public synchronized void markResourceDegraded() {
degradedTimestamp = System.currentTimeMillis();
}
public synchronized void markRecoveryStarted() {
if (recoverTimestamp == null) {
recoverTimestamp = System.currentTimeMillis();
}
}
public synchronized void markRecoveryCompleted() {
recoverTimestamp = null;
recoveryFuture = null;
}
public void setRecoveryFuture(final Future<Boolean> future) {
recoveryFuture = future;
}
public boolean canAttemptRecovery() {
return recoveryFuture == null || recoveryFuture.isDone();
}
public void setFenceFuture(final Future<Boolean> future) {
fenceFuture = future;
}
public boolean lastFencingCompleted() {
return fenceFuture == null || fenceFuture.isDone();
}
}

View File

@ -0,0 +1,35 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider;
import org.joda.time.DateTime;
import org.apache.cloudstack.ha.HAResource;
import com.cloud.utils.component.Adapter;
/**
* Checking activity requires deeper investigation. This will be invoked when a health check has failed.
*
* @param <R>
*/
public interface ActivityCheckerInterface<R extends HAResource> extends Adapter {
boolean isActive(R r, DateTime t) throws HACheckerException ;
}

View File

@ -0,0 +1,29 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider;
public class HACheckerException extends Exception {
private static final long serialVersionUID = 1L;
public HACheckerException(String string, Exception e) {
super(string, e);
}
}

View File

@ -0,0 +1,29 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider;
public class HAFenceException extends Exception {
private static final long serialVersionUID = 1L;
public HAFenceException(String string, Exception e) {
super(string, e);
}
}

View File

@ -0,0 +1,65 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider;
import com.cloud.utils.component.Adapter;
import org.apache.cloudstack.ha.HAConfig;
import org.joda.time.DateTime;
import org.apache.cloudstack.ha.HAResource;
public interface HAProvider<R extends HAResource> extends Adapter {
enum HAProviderConfig {
HealthCheckTimeout,
ActivityCheckTimeout,
RecoveryTimeout,
FenceTimeout,
ActivityCheckFailureRatio,
MaxActivityChecks,
MaxRecoveryAttempts,
MaxActivityCheckInterval,
MaxDegradedWaitTimeout,
RecoveryWaitTimeout
};
HAResource.ResourceType resourceType();
HAResource.ResourceSubType resourceSubType();
boolean isDisabled(R r);
boolean isInMaintenanceMode(R r);
boolean isEligible(R r);
boolean isHealthy(R r) throws HACheckerException;
boolean hasActivity(R r, DateTime afterThis) throws HACheckerException;
boolean recover(R r) throws HARecoveryException;
boolean fence(R r) throws HAFenceException;
void setFenced(R r);
void sendAlert(R r, HAConfig.HAState nextState);
Object getConfigValue(HAProviderConfig name, R r);
}

View File

@ -0,0 +1,28 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider;
public class HARecoveryException extends Exception {
private static final long serialVersionUID = 1L;
public HARecoveryException(String string, Exception e) {
super(string, e);
}
}

View File

@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider;
import org.apache.cloudstack.ha.HAResource;
import com.cloud.utils.component.Adapter;
/**
* Health checker is a quick way to find out if a resource is active. Like pinging the host or checking agent health.
*
* @param <R>
*/
public interface HealthCheckerInterface<R extends HAResource> extends Adapter {
boolean isHealthy(R r);
}

View File

@ -0,0 +1,23 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider;
import com.cloud.host.Host;
public interface HostHAProvider extends HAProvider<Host> {
}

View File

@ -0,0 +1,105 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.provider.host;
import com.cloud.agent.AgentManager;
import com.cloud.alert.AlertManager;
import com.cloud.ha.HighAvailabilityManager;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.Status.Event;
import com.cloud.resource.ResourceManager;
import com.cloud.resource.ResourceState;
import com.cloud.utils.component.AdapterBase;
import com.cloud.utils.fsm.NoTransitionException;
import org.apache.cloudstack.alert.AlertService;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.log4j.Logger;
import javax.inject.Inject;
public abstract class HAAbstractHostProvider extends AdapterBase implements HAProvider<Host> {
private final static Logger LOG = Logger.getLogger(HAAbstractHostProvider.class);
@Inject
private AlertManager alertManager;
@Inject
protected AgentManager agentManager;
@Inject
protected ResourceManager resourceManager;
@Inject
protected HighAvailabilityManager oldHighAvailabilityManager;
@Override
public HAResource.ResourceType resourceType() {
return HAResource.ResourceType.Host;
}
public HAResource.ResourceSubType resourceSubType() {
return HAResource.ResourceSubType.Unknown;
}
@Override
public boolean isDisabled(final Host host) {
return host.isDisabled();
}
@Override
public boolean isInMaintenanceMode(final Host host) {
return host.isInMaintenanceStates();
}
@Override
public void setFenced(final Host r) {
if (r.getState() != Status.Down) {
try {
LOG.debug("Trying to disconnect the host without investigation and scheduling HA for the VMs on host id=" + r.getId());
agentManager.disconnectWithoutInvestigation(r.getId(), Event.HostDown);
oldHighAvailabilityManager.scheduleRestartForVmsOnHost((HostVO)r, true);
} catch (Exception e) {
LOG.error("Failed to disconnect host and schedule HA restart of VMs after fencing the host: ", e);
}
try {
resourceManager.resourceStateTransitTo(r, ResourceState.Event.InternalEnterMaintenance, ManagementServerNode.getManagementServerId());
} catch (NoTransitionException e) {
LOG.error("Failed to put host in maintenance mode after host-ha fencing and scheduling VM-HA: ", e);
}
}
}
@Override
public void sendAlert(final Host host, final HAConfig.HAState nextState) {
String subject = "HA operation performed for host";
String body = subject;
if (HAConfig.HAState.Fencing.equals(nextState)) {
subject = String.format("HA Fencing of host id=%d, in dc id=%d performed", host.getId(), host.getDataCenterId());
body = String.format("HA Fencing has been performed for host id=%d, uuid=%s in datacenter id=%d", host.getId(), host.getUuid(), host.getDataCenterId());
} else if (HAConfig.HAState.Recovering.equals(nextState)) {
subject = String.format("HA Recovery of host id=%d, in dc id=%d performed", host.getId(), host.getDataCenterId());
body = String.format("HA Recovery has been performed for host id=%d, uuid=%s in datacenter id=%d", host.getId(), host.getUuid(), host.getDataCenterId());
}
alertManager.sendAlert(AlertService.AlertType.ALERT_TYPE_HA_ACTION, host.getDataCenterId(), host.getPodId(), subject, body);
}
}

View File

@ -0,0 +1,82 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.task;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.HAResourceCounter;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.cloudstack.ha.provider.HAProvider.HAProviderConfig;
import org.apache.log4j.Logger;
import javax.inject.Inject;
import org.joda.time.DateTime;
import java.util.concurrent.ExecutorService;
public class ActivityCheckTask extends BaseHATask {
public static final Logger LOG = Logger.getLogger(ActivityCheckTask.class);
@Inject
private HAManager haManager;
private final long disconnectTime;
public ActivityCheckTask(final HAResource resource, final HAProvider<HAResource> haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig,
final ExecutorService executor, final long disconnectTime) {
super(resource, haProvider, haConfig, haProviderConfig, executor);
this.disconnectTime = disconnectTime;
}
public boolean performAction() throws HACheckerException {
return getHaProvider().hasActivity(getResource(), new DateTime(disconnectTime));
}
public void processResult(boolean result, Throwable t) {
final HAConfig haConfig = getHaConfig();
final HAProvider<HAResource> haProvider = getHaProvider();
final HAResource resource = getResource();
final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
if (t != null && t instanceof HACheckerException) {
haManager.transitionHAState(HAConfig.Event.Ineligible, getHaConfig());
counter.resetActivityCounter();
return;
}
counter.incrActivityCounter(!result);
long maxActivityChecks = (Long)haProvider.getConfigValue(HAProviderConfig.MaxActivityChecks, resource);
if (counter.getActivityCheckCounter() < maxActivityChecks) {
haManager.transitionHAState(HAConfig.Event.TooFewActivityCheckSamples, haConfig);
return;
}
double activityCheckFailureRatio = (Double)haProvider.getConfigValue(HAProviderConfig.ActivityCheckFailureRatio, resource);
if (counter.hasActivityThresholdExceeded(activityCheckFailureRatio)) {
haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureOverThresholdRatio, haConfig);
} else {
haManager.transitionHAState(HAConfig.Event.ActivityCheckFailureUnderThresholdRatio, haConfig);
counter.markResourceDegraded();
}
counter.resetActivityCounter();
}
}

View File

@ -0,0 +1,102 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.task;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HAFenceException;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.cloudstack.ha.provider.HARecoveryException;
import org.apache.log4j.Logger;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
public abstract class BaseHATask implements Callable<Boolean> {
public static final Logger LOG = Logger.getLogger(BaseHATask.class);
private final HAResource resource;
private final HAProvider<HAResource> haProvider;
private final HAConfig haConfig;
private final ExecutorService executor;
private Long timeout;
public BaseHATask(final HAResource resource, final HAProvider<HAResource> haProvider, final HAConfig haConfig, final HAProvider.HAProviderConfig haProviderConfig,
final ExecutorService executor) {
this.resource = resource;
this.haProvider = haProvider;
this.haConfig = haConfig;
this.executor = executor;
this.timeout = (Long)haProvider.getConfigValue(haProviderConfig, resource);
}
public HAProvider<HAResource> getHaProvider() {
return haProvider;
}
public HAConfig getHaConfig() {
return haConfig;
}
public HAResource getResource() {
return resource;
}
public String getTaskType() {
return this.getClass().getSimpleName();
}
public boolean performAction() throws HACheckerException, HAFenceException, HARecoveryException {
return true;
}
public abstract void processResult(boolean result, Throwable e);
@Override
public Boolean call() {
final Future<Boolean> future = executor.submit(new Callable<Boolean>() {
@Override
public Boolean call() throws HACheckerException, HAFenceException, HARecoveryException {
return performAction();
}
});
boolean result = false;
Throwable throwable = null;
try {
if (timeout == null) {
result = future.get();
} else {
result = future.get(timeout, TimeUnit.SECONDS);
}
} catch (InterruptedException | ExecutionException e) {
LOG.warn("Exception occurred while running " + getTaskType() + " on a resource: " + e.getMessage(), e.getCause());
throwable = e.getCause();
} catch (TimeoutException e) {
LOG.trace(getTaskType() + " operation timed out for resource id:" + resource.getId());
}
processResult(result, throwable);
return result;
}
}

View File

@ -0,0 +1,55 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.task;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.HAResourceCounter;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HAFenceException;
import org.apache.cloudstack.ha.provider.HAProvider;
import javax.inject.Inject;
import java.util.concurrent.ExecutorService;
public class FenceTask extends BaseHATask {
@Inject
private HAManager haManager;
public FenceTask(final HAResource resource, final HAProvider<HAResource> haProvider, final HAConfig haConfig,
final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) {
super(resource, haProvider, haConfig, haProviderConfig, executor);
}
public boolean performAction() throws HACheckerException, HAFenceException {
return getHaProvider().fence(getResource());
}
public void processResult(boolean result, Throwable e) {
final HAConfig haConfig = getHaConfig();
final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
if (result) {
counter.resetRecoveryCounter();
haManager.transitionHAState(HAConfig.Event.Fenced, haConfig);
getHaProvider().setFenced(getResource());
}
getHaProvider().sendAlert(getResource(), HAConfig.HAState.Fencing);
}
}

View File

@ -0,0 +1,63 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.task;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.HAResourceCounter;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.log4j.Logger;
import javax.inject.Inject;
import java.util.concurrent.ExecutorService;
public class HealthCheckTask extends BaseHATask {
@Inject
private HAManager haManager;
public static final Logger LOG = Logger.getLogger(HealthCheckTask.class);
public HealthCheckTask(final HAResource resource, final HAProvider<HAResource> haProvider, final HAConfig haConfig,
final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) {
super(resource, haProvider, haConfig, haProviderConfig, executor);
}
public boolean performAction() throws HACheckerException {
return getHaProvider().isHealthy(getResource());
}
public void processResult(boolean result, Throwable e) {
final HAConfig haConfig = getHaConfig();
final HAResourceCounter counter = haManager.getHACounter(haConfig.getResourceId(), haConfig.getResourceType());
if (result) {
haManager.transitionHAState(HAConfig.Event.HealthCheckPassed, haConfig);
if (haConfig.getState() == HAConfig.HAState.Fenced) {
haManager.disableHA(haConfig.getResourceId(), haConfig.getResourceType());
}
counter.resetSuspectTimestamp();
counter.resetActivityCounter();
counter.resetRecoveryCounter();
} else {
haManager.transitionHAState(HAConfig.Event.HealthCheckFailed, haConfig);
counter.markResourceSuspected();
}
}
}

View File

@ -0,0 +1,51 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.ha.task;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.ha.HAManager;
import org.apache.cloudstack.ha.HAResource;
import org.apache.cloudstack.ha.provider.HACheckerException;
import org.apache.cloudstack.ha.provider.HAProvider;
import org.apache.cloudstack.ha.provider.HARecoveryException;
import javax.inject.Inject;
import java.util.concurrent.ExecutorService;
public class RecoveryTask extends BaseHATask {
@Inject
private HAManager haManager;
public RecoveryTask(final HAResource resource, final HAProvider<HAResource> haProvider, final HAConfig haConfig,
final HAProvider.HAProviderConfig haProviderConfig, final ExecutorService executor) {
super(resource, haProvider, haConfig, haProviderConfig, executor);
}
public boolean performAction() throws HACheckerException, HARecoveryException {
return getHaProvider().recover(getResource());
}
public void processResult(boolean result, Throwable e) {
final HAConfig haConfig = getHaConfig();
if (result) {
haManager.transitionHAState(HAConfig.Event.Recovered, haConfig);
}
getHaProvider().sendAlert(getResource(), HAConfig.HAState.Recovering);
}
}

View File

@ -138,3 +138,106 @@ CREATE TABLE IF NOT EXISTS `cloud`.`crl` (
KEY (`serial`),
UNIQUE KEY (`serial`, `cn`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- Host HA feature
CREATE TABLE IF NOT EXISTS `cloud`.`ha_config` (
`id` bigint(20) unsigned NOT NULL AUTO_INCREMENT,
`resource_id` bigint(20) unsigned DEFAULT NULL COMMENT 'id of the resource',
`resource_type` varchar(255) NOT NULL COMMENT 'the type of the resource',
`enabled` int(1) unsigned DEFAULT '0' COMMENT 'is HA enabled for the resource',
`ha_state` varchar(255) DEFAULT 'Disabled' COMMENT 'HA state',
`provider` varchar(255) DEFAULT NULL COMMENT 'HA provider',
`update_count` bigint(20) unsigned NOT NULL DEFAULT '0' COMMENT 'state based incr-only counter for atomic ha_state updates',
`update_time` datetime COMMENT 'last ha_state update datetime',
`mgmt_server_id` bigint(20) unsigned DEFAULT NULL COMMENT 'management server id that is responsible for the HA for the resource',
PRIMARY KEY (`id`),
KEY `i_ha_config__enabled` (`enabled`),
KEY `i_ha_config__ha_state` (`ha_state`),
KEY `i_ha_config__mgmt_server_id` (`mgmt_server_id`),
UNIQUE KEY (`resource_id`, `resource_type`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
DELETE from `cloud`.`configuration` where name='outofbandmanagement.sync.interval';
-- Host HA changes:
DROP VIEW IF EXISTS `cloud`.`host_view`;
CREATE VIEW `cloud`.`host_view` AS
select
host.id,
host.uuid,
host.name,
host.status,
host.disconnected,
host.type,
host.private_ip_address,
host.version,
host.hypervisor_type,
host.hypervisor_version,
host.capabilities,
host.last_ping,
host.created,
host.removed,
host.resource_state,
host.mgmt_server_id,
host.cpu_sockets,
host.cpus,
host.speed,
host.ram,
cluster.id cluster_id,
cluster.uuid cluster_uuid,
cluster.name cluster_name,
cluster.cluster_type,
data_center.id data_center_id,
data_center.uuid data_center_uuid,
data_center.name data_center_name,
data_center.networktype data_center_type,
host_pod_ref.id pod_id,
host_pod_ref.uuid pod_uuid,
host_pod_ref.name pod_name,
host_tags.tag,
guest_os_category.id guest_os_category_id,
guest_os_category.uuid guest_os_category_uuid,
guest_os_category.name guest_os_category_name,
mem_caps.used_capacity memory_used_capacity,
mem_caps.reserved_capacity memory_reserved_capacity,
cpu_caps.used_capacity cpu_used_capacity,
cpu_caps.reserved_capacity cpu_reserved_capacity,
async_job.id job_id,
async_job.uuid job_uuid,
async_job.job_status job_status,
async_job.account_id job_account_id,
oobm.enabled AS `oobm_enabled`,
oobm.power_state AS `oobm_power_state`,
ha_config.enabled AS `ha_enabled`,
ha_config.ha_state AS `ha_state`,
ha_config.provider AS `ha_provider`
from
`cloud`.`host`
left join
`cloud`.`cluster` ON host.cluster_id = cluster.id
left join
`cloud`.`data_center` ON host.data_center_id = data_center.id
left join
`cloud`.`host_pod_ref` ON host.pod_id = host_pod_ref.id
left join
`cloud`.`host_details` ON host.id = host_details.host_id
and host_details.name = 'guest.os.category.id'
left join
`cloud`.`guest_os_category` ON guest_os_category.id = CONVERT( host_details.value , UNSIGNED)
left join
`cloud`.`host_tags` ON host_tags.host_id = host.id
left join
`cloud`.`op_host_capacity` mem_caps ON host.id = mem_caps.host_id
and mem_caps.capacity_type = 0
left join
`cloud`.`op_host_capacity` cpu_caps ON host.id = cpu_caps.host_id
and cpu_caps.capacity_type = 1
left join
`cloud`.`async_job` ON async_job.instance_id = host.id
and async_job.instance_type = 'Host'
and async_job.job_status = 0
left join
`cloud`.`oobm` ON oobm.host_id = host.id
left join
`cloud`.`ha_config` ON ha_config.resource_id=host.id
and ha_config.resource_type='Host';

View File

@ -272,79 +272,3 @@ CREATE VIEW `cloud`.`user_view` AS
and async_job.instance_type = 'User'
and async_job.job_status = 0;
-- Out-of-band management
DROP VIEW IF EXISTS `cloud`.`host_view`;
CREATE VIEW `cloud`.`host_view` AS
select
host.id,
host.uuid,
host.name,
host.status,
host.disconnected,
host.type,
host.private_ip_address,
host.version,
host.hypervisor_type,
host.hypervisor_version,
host.capabilities,
host.last_ping,
host.created,
host.removed,
host.resource_state,
host.mgmt_server_id,
host.cpu_sockets,
host.cpus,
host.speed,
host.ram,
cluster.id cluster_id,
cluster.uuid cluster_uuid,
cluster.name cluster_name,
cluster.cluster_type,
data_center.id data_center_id,
data_center.uuid data_center_uuid,
data_center.name data_center_name,
data_center.networktype data_center_type,
host_pod_ref.id pod_id,
host_pod_ref.uuid pod_uuid,
host_pod_ref.name pod_name,
host_tags.tag,
guest_os_category.id guest_os_category_id,
guest_os_category.uuid guest_os_category_uuid,
guest_os_category.name guest_os_category_name,
mem_caps.used_capacity memory_used_capacity,
mem_caps.reserved_capacity memory_reserved_capacity,
cpu_caps.used_capacity cpu_used_capacity,
cpu_caps.reserved_capacity cpu_reserved_capacity,
async_job.id job_id,
async_job.uuid job_uuid,
async_job.job_status job_status,
async_job.account_id job_account_id,
oobm.enabled AS `oobm_enabled`,
oobm.power_state AS `oobm_power_state`
from
`cloud`.`host`
left join
`cloud`.`cluster` ON host.cluster_id = cluster.id
left join
`cloud`.`data_center` ON host.data_center_id = data_center.id
left join
`cloud`.`host_pod_ref` ON host.pod_id = host_pod_ref.id
left join
`cloud`.`host_details` ON host.id = host_details.host_id
and host_details.name = 'guest.os.category.id'
left join
`cloud`.`guest_os_category` ON guest_os_category.id = CONVERT( host_details.value , UNSIGNED)
left join
`cloud`.`host_tags` ON host_tags.host_id = host.id
left join
`cloud`.`op_host_capacity` mem_caps ON host.id = mem_caps.host_id
and mem_caps.capacity_type = 0
left join
`cloud`.`op_host_capacity` cpu_caps ON host.id = cpu_caps.host_id
and cpu_caps.capacity_type = 1
left join
`cloud`.`async_job` ON async_job.instance_id = host.id
and async_job.instance_type = 'Host'
and async_job.job_status = 0
left join
`cloud`.`oobm` ON oobm.host_id = host.id;

View File

@ -495,6 +495,83 @@ CREATE TABLE IF NOT EXISTS `cloud`.`oobm` (
CONSTRAINT `fk_oobm__host_id` FOREIGN KEY (`host_id`) REFERENCES `host` (`id`) ON DELETE CASCADE
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
-- Out-of-band management
DROP VIEW IF EXISTS `cloud`.`host_view`;
CREATE VIEW `cloud`.`host_view` AS
select
host.id,
host.uuid,
host.name,
host.status,
host.disconnected,
host.type,
host.private_ip_address,
host.version,
host.hypervisor_type,
host.hypervisor_version,
host.capabilities,
host.last_ping,
host.created,
host.removed,
host.resource_state,
host.mgmt_server_id,
host.cpu_sockets,
host.cpus,
host.speed,
host.ram,
cluster.id cluster_id,
cluster.uuid cluster_uuid,
cluster.name cluster_name,
cluster.cluster_type,
data_center.id data_center_id,
data_center.uuid data_center_uuid,
data_center.name data_center_name,
data_center.networktype data_center_type,
host_pod_ref.id pod_id,
host_pod_ref.uuid pod_uuid,
host_pod_ref.name pod_name,
host_tags.tag,
guest_os_category.id guest_os_category_id,
guest_os_category.uuid guest_os_category_uuid,
guest_os_category.name guest_os_category_name,
mem_caps.used_capacity memory_used_capacity,
mem_caps.reserved_capacity memory_reserved_capacity,
cpu_caps.used_capacity cpu_used_capacity,
cpu_caps.reserved_capacity cpu_reserved_capacity,
async_job.id job_id,
async_job.uuid job_uuid,
async_job.job_status job_status,
async_job.account_id job_account_id,
oobm.enabled AS `oobm_enabled`,
oobm.power_state AS `oobm_power_state`
from
`cloud`.`host`
left join
`cloud`.`cluster` ON host.cluster_id = cluster.id
left join
`cloud`.`data_center` ON host.data_center_id = data_center.id
left join
`cloud`.`host_pod_ref` ON host.pod_id = host_pod_ref.id
left join
`cloud`.`host_details` ON host.id = host_details.host_id
and host_details.name = 'guest.os.category.id'
left join
`cloud`.`guest_os_category` ON guest_os_category.id = CONVERT( host_details.value , UNSIGNED)
left join
`cloud`.`host_tags` ON host_tags.host_id = host.id
left join
`cloud`.`op_host_capacity` mem_caps ON host.id = mem_caps.host_id
and mem_caps.capacity_type = 0
left join
`cloud`.`op_host_capacity` cpu_caps ON host.id = cpu_caps.host_id
and cpu_caps.capacity_type = 1
left join
`cloud`.`async_job` ON async_job.instance_id = host.id
and async_job.instance_type = 'Host'
and async_job.job_status = 0
left join
`cloud`.`oobm` ON oobm.host_id = host.id;
INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '4.0', 'centosGuest', 171, now(), 0);
INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '4.1', 'centosGuest', 171, now(), 0);
INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.0', 'centosGuest', 171, now(), 0);
@ -545,4 +622,3 @@ INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervis
INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.0', 'centos64Guest', 228, now(), 0);
INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.1', 'centos64Guest', 228, now(), 0);
INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid,hypervisor_type, hypervisor_version, guest_os_name, guest_os_id, created, is_user_defined) VALUES (UUID(), 'VMware', '5.5', 'centos64Guest', 228, now(), 0);

View File

@ -277,4 +277,4 @@ CREATE TABLE `cloud`.`external_netscaler_controlcenter` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `cloud`.`sslcerts` ADD COLUMN `name` varchar(255) NULL default NULL COMMENT 'Name of the Certificate';
ALTER TABLE `cloud`.`network_offerings` ADD COLUMN `service_package_id` varchar(255) NULL default NULL COMMENT 'Netscaler ControlCenter Service Package';
ALTER TABLE `cloud`.`network_offerings` ADD COLUMN `service_package_id` varchar(255) NULL default NULL COMMENT 'Netscaler ControlCenter Service Package';

View File

@ -84,7 +84,7 @@ class TestHostHA(cloudstackTestCase):
"sleep": 60,
"timeout": 10,
}
def tearDown(self):
try:
@ -95,20 +95,20 @@ class TestHostHA(cloudstackTestCase):
raise Exception("Warning: Exception during cleanup : %s" % e)
return
def createVMs(self, hostId, number, local):
self.template = get_template(
self.apiclient,
self.zone.id,
self.services["ostype"]
)
if self.template == FAILED:
assert False, "get_template() failed to return template with description %s" % self.services["ostype"]
self.logger.debug("Using template %s " % self.template.id)
if local:
self.service_offering = ServiceOffering.create(
self.apiclient,
@ -119,10 +119,9 @@ class TestHostHA(cloudstackTestCase):
self.apiclient,
self.services["service_offering"]
)
self.logger.debug("Using service offering %s " % self.service_offering.id)
vms = []
for i in range(0, number):
self.services["vm"]["zoneid"] = self.zone.id
@ -139,7 +138,7 @@ class TestHostHA(cloudstackTestCase):
self.cleanup.append(vm)
self.logger.debug("VM create = {}".format(vm.id))
return vm
def noOfVMsOnHost(self, hostId):
listVms = VirtualMachine.list(
self.apiclient,
@ -150,12 +149,12 @@ class TestHostHA(cloudstackTestCase):
for vm in listVms:
self.logger.debug('VirtualMachine on Hyp 1 = {}'.format(vm.id))
vmnos = vmnos + 1
return vmnos
def checkHostDown(self, fromHostIp, testHostIp):
try:
ssh = SshClient(fromHostIp, 22, "root", "password")
ssh = SshClient(fromHostIp, 22, "root", "password")
res = ssh.execute("ping -c 1 %s" % testHostIp)
result = str(res)
if result.count("100% packet loss") == 1:
@ -165,10 +164,10 @@ class TestHostHA(cloudstackTestCase):
except Exception as e:
self.logger.debug("Got exception %s" % e)
return False, 1
def checkHostUp(self, fromHostIp, testHostIp):
try:
ssh = SshClient(fromHostIp, 22, "root", "password")
ssh = SshClient(fromHostIp, 22, "root", "password")
res = ssh.execute("ping -c 1 %s" % testHostIp)
result = str(res)
if result.count(" 0% packet loss") == 1:
@ -178,8 +177,8 @@ class TestHostHA(cloudstackTestCase):
except Exception as e:
self.logger.debug("Got exception %s" % e)
return False, 1
def isOnlyNFSStorageAvailable(self):
if self.zone.localstorageenabled:
return False
@ -196,13 +195,13 @@ class TestHostHA(cloudstackTestCase):
for storage_pool in storage_pools:
if storage_pool.type == u'NetworkFilesystem':
return True
return False
def isOnlyLocalStorageAvailable(self):
if not(self.zone.localstorageenabled):
return False
storage_pools = StoragePool.list(
self.apiclient,
zoneid=self.zone.id,
@ -216,13 +215,13 @@ class TestHostHA(cloudstackTestCase):
for storage_pool in storage_pools:
if storage_pool.type == u'NetworkFilesystem':
return False
return True
def isLocalAndNFSStorageAvailable(self):
if not(self.zone.localstorageenabled):
return False
storage_pools = StoragePool.list(
self.apiclient,
zoneid=self.zone.id,
@ -236,10 +235,10 @@ class TestHostHA(cloudstackTestCase):
for storage_pool in storage_pools:
if storage_pool.type == u'NetworkFilesystem':
return True
return False
def checkHostStateInCloudstack(self, state, hostId):
try:
listHost = Host.list(
@ -254,7 +253,7 @@ class TestHostHA(cloudstackTestCase):
True,
"Check if listHost returns a valid response"
)
self.assertEqual(
len(listHost),
1,
@ -268,19 +267,30 @@ class TestHostHA(cloudstackTestCase):
except Exception as e:
self.logger.debug("Got exception %s" % e)
return False, 1
def disconnectHostfromNetwork(self, hostIp, timeout):
srcFile = os.path.dirname(os.path.realpath(__file__)) + "/test_host_ha.sh"
if not(os.path.isfile(srcFile)):
self.logger.debug("File %s not found" % srcFile)
raise unittest.SkipTest("Script file %s required for HA not found" % srcFile);
ssh = SshClient(hostIp, 22, "root", "password")
ssh.scp(srcFile, "/root/test_host_ha.sh")
ssh.execute("nohup sh /root/test_host_ha.sh %s > /dev/null 2>&1 &\n" % timeout)
ssh.execute("nohup sh /root/test_host_ha.sh -t %s -d all > /dev/null 2>&1 &\n" % timeout)
return
def stopAgentOnHost(self, hostIp, timeout):
srcFile = os.path.dirname(os.path.realpath(__file__)) + "/test_host_ha.sh"
if not(os.path.isfile(srcFile)):
self.logger.debug("File %s not found" % srcFile)
raise unittest.SkipTest("Script file %s required for HA not found" % srcFile);
ssh = SshClient(hostIp, 22, "root", "password")
ssh.scp(srcFile, "/root/test_host_ha.sh")
ssh.execute("nohup sh /root/test_host_ha.sh -t %s -d agent > /dev/null 2>&1 &\n" % timeout)
return
@attr(
tags=[
@ -292,11 +302,13 @@ class TestHostHA(cloudstackTestCase):
"sg"],
required_hardware="true")
def test_01_host_ha_with_nfs_storagepool_with_vm(self):
raise unittest.SkipTest("Skipping this test as this is for NFS store only.");
return
if not(self.isOnlyNFSStorageAvailable()):
raise unittest.SkipTest("Skipping this test as this is for NFS store only.");
return
listHost = Host.list(
self.apiclient,
type='Routing',
@ -305,61 +317,61 @@ class TestHostHA(cloudstackTestCase):
)
for host in listHost:
self.logger.debug('Hypervisor = {}'.format(host.id))
if len(listHost) != 2:
self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost));
raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost));
return
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
if no_of_vms < 5:
self.logger.debug("test_01: Create VMs as there are not enough vms to check host ha")
no_vm_req = 5 - no_of_vms
if (no_vm_req > 0):
self.logger.debug("Creating vms = {}".format(no_vm_req))
self.vmlist = self.createVMs(listHost[0].id, no_vm_req, False)
ha_host = listHost[1]
other_host = listHost[0]
if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id):
ha_host = listHost[0]
other_host = listHost[1]
self.disconnectHostfromNetwork(ha_host.ipaddress, 400)
hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress)
if not(hostDown):
if not(hostDown):
raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress))
hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id)
#the test could have failed here but we will try our best to get host back in consistent state
no_of_vms = self.noOfVMsOnHost(ha_host.id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
#
hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress)
if not(hostUp):
if not(hostUp):
self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress))
hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id)
if not(hostDownInCloudstack):
if not(hostDownInCloudstack):
raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress))
if not(hostUpInCloudstack):
if not(hostUpInCloudstack):
raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress))
return
@attr(
tags=[
"advanced",
@ -370,11 +382,12 @@ class TestHostHA(cloudstackTestCase):
"sg"],
required_hardware="true")
def test_02_host_ha_with_local_storage_and_nfs(self):
raise unittest.SkipTest("Skipping this test as this is for NFS store only.");
return
if not(self.isLocalAndNFSStorageAvailable()):
raise unittest.SkipTest("Skipping this test as this is for Local storage and NFS storage only.");
return
listHost = Host.list(
self.apiclient,
type='Routing',
@ -383,62 +396,62 @@ class TestHostHA(cloudstackTestCase):
)
for host in listHost:
self.logger.debug('Hypervisor = {}'.format(host.id))
if len(listHost) != 2:
self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost));
raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost));
return
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
if no_of_vms < 5:
self.logger.debug("test_02: Create VMs as there are not enough vms to check host ha")
no_vm_req = 5 - no_of_vms
if (no_vm_req > 0):
self.logger.debug("Creating vms = {}".format(no_vm_req))
self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True)
ha_host = listHost[1]
other_host = listHost[0]
if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id):
ha_host = listHost[0]
other_host = listHost[1]
self.disconnectHostfromNetwork(ha_host.ipaddress, 400)
hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress)
if not(hostDown):
if not(hostDown):
raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress))
hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Down", ha_host.id)
#the test could have failed here but we will try our best to get host back in consistent state
no_of_vms = self.noOfVMsOnHost(ha_host.id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
#
hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress)
if not(hostUp):
if not(hostUp):
self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress))
hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id)
if not(hostDownInCloudstack):
if not(hostDownInCloudstack):
raise self.fail("Host is not down %s, in cloudstack so failing test " % (ha_host.ipaddress))
if not(hostUpInCloudstack):
if not(hostUpInCloudstack):
raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress))
return
@attr(
tags=[
"advanced",
@ -449,11 +462,13 @@ class TestHostHA(cloudstackTestCase):
"sg"],
required_hardware="true")
def test_03_host_ha_with_only_local_storage(self):
raise unittest.SkipTest("Skipping this test as this is for NFS store only.");
return
if not(self.isOnlyLocalStorageAvailable()):
raise unittest.SkipTest("Skipping this test as this is for Local storage only.");
return
listHost = Host.list(
self.apiclient,
type='Routing',
@ -462,55 +477,125 @@ class TestHostHA(cloudstackTestCase):
)
for host in listHost:
self.logger.debug('Hypervisor = {}'.format(host.id))
if len(listHost) != 2:
self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost));
raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost));
return
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
if no_of_vms < 5:
self.logger.debug("test_03: Create VMs as there are not enough vms to check host ha")
no_vm_req = 5 - no_of_vms
if (no_vm_req > 0):
self.logger.debug("Creating vms = {}".format(no_vm_req))
self.vmlist = self.createVMs(listHost[0].id, no_vm_req, True)
ha_host = listHost[1]
other_host = listHost[0]
if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id):
ha_host = listHost[0]
other_host = listHost[1]
self.disconnectHostfromNetwork(ha_host.ipaddress, 400)
hostDown = wait_until(10, 10, self.checkHostDown, other_host.ipaddress, ha_host.ipaddress)
if not(hostDown):
if not(hostDown):
raise unittest.SkipTest("Host %s is not down, cannot proceed with test" % (ha_host.ipaddress))
hostDownInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Alert", ha_host.id)
#the test could have failed here but we will try our best to get host back in consistent state
no_of_vms = self.noOfVMsOnHost(ha_host.id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
#
hostUp = wait_until(10, 10, self.checkHostUp, other_host.ipaddress, ha_host.ipaddress)
if not(hostUp):
if not(hostUp):
self.logger.debug("Host is down %s, though HA went fine, the environment is not consistent " % (ha_host.ipaddress))
hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id)
if not(hostDownInCloudstack):
if not(hostDownInCloudstack):
raise self.fail("Host is not in alert %s, in cloudstack so failing test " % (ha_host.ipaddress))
if not(hostUpInCloudstack):
if not(hostUpInCloudstack):
raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress))
return
return
@attr(
tags=[
"advanced",
"advancedns",
"smoke",
"basic",
"eip",
"sg"],
required_hardware="true")
def test_04_host_ha_vmactivity_check(self):
if not(self.isOnlyNFSStorageAvailable()):
raise unittest.SkipTest("Skipping this test as this is for NFS store only.");
return
listHost = Host.list(
self.apiclient,
type='Routing',
zoneid=self.zone.id,
podid=self.pod.id,
)
for host in listHost:
self.logger.debug('Hypervisor = {}'.format(host.id))
if len(listHost) != 2:
self.logger.debug("Host HA can be tested with two host only %s, found" % len(listHost));
raise unittest.SkipTest("Host HA can be tested with two host only %s, found" % len(listHost));
return
no_of_vms = self.noOfVMsOnHost(listHost[0].id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(listHost[1].id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
if no_of_vms < 5:
self.logger.debug("test_01: Create VMs as there are not enough vms to check host ha")
no_vm_req = 5 - no_of_vms
if (no_vm_req > 0):
self.logger.debug("Creating vms = {}".format(no_vm_req))
self.vmlist = self.createVMs(listHost[0].id, no_vm_req, False)
ha_host = listHost[1]
other_host = listHost[0]
if self.noOfVMsOnHost(listHost[0].id) > self.noOfVMsOnHost(listHost[1].id):
ha_host = listHost[0]
other_host = listHost[1]
self.stopAgentOnHost(ha_host.ipaddress, 150)
hostDisconnectedInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Disconnected", ha_host.id)
#the test could have failed here but we will try our best to get host back in consistent state
no_of_vms = self.noOfVMsOnHost(ha_host.id)
no_of_vms = no_of_vms + self.noOfVMsOnHost(other_host.id)
self.logger.debug("Number of VMS on hosts = %s" % no_of_vms)
#
hostUpInCloudstack = wait_until(40, 10, self.checkHostStateInCloudstack, "Up", ha_host.id)
if not(hostDisconnectedInCloudstack):
raise self.fail("Host is not disconnected %s, in cloudstack so failing test " % (ha_host.ipaddress))
if not(hostUpInCloudstack):
raise self.fail("Host is not up %s, in cloudstack so failing test " % (ha_host.ipaddress))
return

View File

@ -1,40 +1,100 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#bring down all eth interfaces
set -x
usage() { echo "Usage: $0 <duration in seconds for downing all network interfaces>"; exit 1; }
usage() { echo "Usage: $0 -d <all|agent> -t <duration in seconds for downing all network interfaces>"; exit 1; }
case $1 in
Interval=
Down=
while getopts 'd:t:' OPTION
do
case $OPTION in
d)
Down="$OPTARG"
;;
t)
Interval="$OPTARG"
;;
*)
usage
;;
esac
done
if [ -z $Interval ]; then
usage
fi
if [ "$Down" != 'all' ]; then
if [ "$Down" != 'agent' ]; then
usage
fi
fi
case $Interval in
''|*[!0-9]*) echo "The parameter should be an integer"; exit ;;
*) echo $1 ;;
esac
if [ -z $1 ]; then
usage
elif [ $1 -lt 1 ]; then
if [ $Interval -lt 1 ]; then
echo "Down time should be at least 1 second"
exit 1
elif [ $1 -gt 5000 ]; then
elif [ $Interval -gt 5000 ]; then
echo "Down time should be less than 5000 second"
exit 1
fi
for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth`
for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep "^eth.$"`
do
ifconfig $i down
ifconfig $i down
done
service cloudstack-agent stop
update-rc.d -f cloudstack-agent remove
sleep $1
sleep 1
for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth`
if [ "$Down" = 'agent' ]; then
for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep "^eth.$"`
do
ifconfig $i up
done
fi
counter=$Interval
while [ $counter -gt 0 ]
do
ifconfig $i up
sleep 1
counter=$(( $counter - 1 ))
done
if [ "$Down" = 'all' ]; then
for i in `ifconfig -a | sed 's/[ \t].*//;/^\(lo\|\)$/d' | grep eth`
do
ifconfig $i up
done
fi
update-rc.d -f cloudstack-agent defaults
service cloudstack-agent start
service cloudstack-agent start

View File

@ -0,0 +1,247 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from marvin.cloudstackTestCase import *
from marvin.cloudstackAPI import *
from marvin.lib.utils import *
from marvin.lib.common import *
from nose.plugins.attrib import attr
import cmd
from cmd import Cmd
class TestHaForHost(cloudstackTestCase):
""" Test cases for configuring HA for Host
"""
def setUp(self):
testClient = super(TestHaForHost, self).getClsTestClient()
self.apiclient = testClient.getApiClient()
self.dbclient = testClient.getDbConnection()
self.services = testClient.getParsedTestDataConfig()
self.zone = get_zone(self.apiclient, testClient.getZoneForTests())
self.host = None
self.server = None
self.cleanup = []
def tearDown(self):
try:
self.dbclient.execute("delete from ha_config where resource_type='Host'")
cleanup_resources(self.apiclient, self.cleanup)
except Exception as e:
raise Exception("Warning: Exception during cleanup : %s" % e)
def getHost(self, hostId=None):
if self.host and hostId is None:
return self.host
response = list_hosts(
self.apiclient,
zoneid=self.zone.id,
type='Routing',
id=hostId
)
if len(response) > 0:
self.host = response[0]
return self.host
raise self.skipTest("No hosts found, skipping HA for Host test")
def getHaProvider(self, host):
cmd = listHostHAProviders.listHostHAProvidersCmd()
cmd.hypervisor = host.hypervisor
response = self.apiclient.listHostHAProviders(cmd)
return response[0].haprovider
def configureHaProvider(self):
cmd = configureHAForHost.configureHAForHostCmd()
cmd.hostid = self.getHost().id
cmd.provider = self.getHaProvider(self.getHost())
return self.apiclient.configureHAForHost(cmd)
def getHaForHostEnableCmd(self):
cmd = enableHAForHost.enableHAForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def getHaForHostDisableCmd(self):
cmd = disableHAForHost.disableHAForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def getListHostHAResources(self):
cmd = listHostHAResources.listHostHAResourcesCmd()
cmd.hostid = self.getHost().id
return cmd
@attr(tags=["advanced",
"advancedns",
"smoke",
"basic",
"sg"],
required_hardware="false")
def test_enable_ha_for_host(self):
"""
This test enables HA for a host
"""
self.configureHaProvider()
cmd = self.getHaForHostEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
@attr(tags=["advanced",
"advancedns",
"smoke",
"basic",
"sg"],
required_hardware="false")
def test_enable_ha_for_host_invalid(self):
"""
This is a negative test for enable HA for a host
"""
self.configureHaProvider()
cmd = self.getHaForHostEnableCmd()
cmd.hostid = -1
try:
response = self.apiclient.enableHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["advanced",
"advancedns",
"smoke",
"basic",
"sg"],
required_hardware="false")
def test_disable_ha_for_host(self):
"""
This test disables HA for a host
"""
self.configureHaProvider()
cmd = self.getHaForHostDisableCmd()
response = self.apiclient.disableHAForHost(cmd)
self.assertTrue(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, False)
response = self.getHost(cmd.hostid)
self.assertEqual(response.hostha.hastate, "Disabled")
@attr(tags=["advanced",
"advancedns",
"smoke",
"basic",
"sg"],
required_hardware="false")
def test_disable_ha_for_host_invalid(self):
"""
This is a negative test for disable HA for a host
"""
self.configureHaProvider()
cmd = self.getHaForHostDisableCmd()
cmd.hostid = -1
try:
response = self.apiclient.disableHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["advanced",
"advancedns",
"smoke",
"basic",
"sg"],
required_hardware="false")
def test_list_ha_for_host(self):
"""
Test that verifies the listHAForHost API
"""
self.configureHaProvider()
db_count = self.dbclient.execute("SELECT count(*) FROM cloud.ha_config")
cmd = self.getListHostHAResources()
del cmd.hostid
response = self.apiclient.listHostHAResources(cmd)
self.assertEqual(db_count[0][0], len(response))
@attr(tags=["advanced",
"advancedns",
"smoke",
"basic",
"sg"],
required_hardware="false")
def test_list_ha_for_host_valid(self):
"""
Valid test for listing a specific host HA resources
"""
self.configureHaProvider()
cmd = self.getListHostHAResources()
response = self.apiclient.listHostHAResources(cmd)
self.assertEqual(response[0].hostid, cmd.hostid)
@attr(tags=["advanced",
"advancedns",
"smoke",
"basic",
"sg"],
required_hardware="false")
def test_list_ha_for_host_invalid(self):
"""
Test that listHostHAResources is returning exception when called with invalid data
"""
self.configureHaProvider()
cmd = self.getListHostHAResources()
cmd.hostid = "someinvalidvalue"
try:
response = self.apiclient.listHostHAResources(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")

View File

@ -0,0 +1,700 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import marvin
from marvin.cloudstackTestCase import *
from marvin.cloudstackAPI import *
from marvin.lib.utils import *
from marvin.lib.base import *
from marvin.lib.common import *
from nose.plugins.attrib import attr
import random
from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer
import random
import socket
import sys
import thread
import time
class TestHAKVM(cloudstackTestCase):
""" Test cases for host HA using KVM host(s)
"""
def setUp(self):
self.apiclient = self.testClient.getApiClient()
self.hypervisor = self.testClient.getHypervisorInfo()
self.dbclient = self.testClient.getDbConnection()
self.services = self.testClient.getParsedTestDataConfig()
self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__
self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__
self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20)
# Cleanup any existing configs
self.dbclient.execute("delete from ha_config where resource_type='Host'")
self.host = self.getHost()
# use random port for ipmisim
s = socket.socket()
s.bind(('', 0))
self.serverPort = s.getsockname()[1]
s.close()
self.cleanup = []
def getFakeMsId(self):
return self.fakeMsId
def getFakeMsRunId(self):
return self.fakeMsId * 1000
def tearDown(self):
try:
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'")
self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'")
self.dbclient.execute("delete from ha_config where resource_type='Host'")
self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort())
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'")
self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'")
cleanup_resources(self.apiclient, self.cleanup)
except Exception as e:
raise Exception("Warning: Exception during cleanup : %s" % e)
def getHostHaEnableCmd(self):
cmd = enableHAForHost.enableHAForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def check_host_transition_to_available(self):
t_end = time.time() + 90
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate == "Available":
return
else:
continue
self.fail(self)
def getHost(self):
response = list_hosts(
self.apiclient,
type='Routing',
resourcestate='Enabled'
)
if response and len(response) > 0:
self.host = response[0]
return self.host
raise self.skipTest("No KVM hosts found, skipping host-ha test")
def getHost(self, hostId=None):
response = list_hosts(
self.apiclient,
type='Routing',
id=hostId
)
if response and len(response) > 0:
self.host = response[0]
return self.host
raise self.skipTest("No KVM hosts found, skipping host-ha test")
def getHostHaConfigCmd(self, provider='kvmhaprovider'):
cmd = configureHAForHost.configureHAForHostCmd()
cmd.provider = provider
cmd.hostid = self.getHost().id
return cmd
def getHostHaEnableCmd(self):
cmd = enableHAForHost.enableHAForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def getHostHaDisableCmd(self):
cmd = disableHAForHost.disableHAForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def configureAndEnableHostHa(self, initialize=True):
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd())
self.assertEqual(response.haenable, True)
if initialize:
self.configureKVMHAProviderState(True, True, True, False)
def configureAndDisableHostHa(self, hostId):
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaDisableCmd()
cmd.hostid = hostId
response = self.apiclient.disableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, False)
def enableHostHa(self, hostId):
cmd = self.getHostHaEnableCmd()
cmd.hostid = hostId
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
return response
def configureKVMHAProviderState(self, health, activity, recover, fence):
cmd = configureHAForHost.configureHAForHostCmd()
cmd.hostid = self.getHost().id
cmd.health = health
cmd.activity = activity
cmd.recover = recover
cmd.fence = fence
response = self.apiclient.configureKVMHAProviderState(cmd)
self.assertEqual(response.success, 'true')
def checkSyncToState(self, state, interval=5000):
def checkForStateSync(expectedState):
response = self.getHost(hostId=self.getHost().id).hostha
return response.hastate == expectedState, None
sync_interval = 1 + int(interval) / 1000
res, _ = wait_until(sync_interval, 10, checkForStateSync, state)
if not res:
self.fail("Failed to get host.hastate synced to expected state:" + state)
response = self.getHost(hostId=self.getHost().id).hostha
self.assertEqual(response.hastate, state)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_configure_invalid_provider(self):
"""
Tests host-ha configuration with invalid driver
"""
cmd = self.getHostHaConfigCmd()
cmd.provider = 'randomDriverThatDoesNotExist'
try:
response = self.apiclient.configureHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_configure_default_driver(self):
"""
Tests host-ha configuration with valid data
"""
cmd = self.getHostHaConfigCmd()
response = self.apiclient.configureHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haprovider, cmd.provider.lower())
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_ha_enable_feature_invalid(self):
"""
Tests ha feature enable command with invalid options
"""
cmd = self.getHostHaEnableCmd()
cmd.hostid = -1
try:
response = self.apiclient.enableHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = enableHAForCluster.enableHAForClusterCmd()
response = self.apiclient.enableHAForCluster(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = enableHAForZone.enableHAForZoneCmd()
response = self.apiclient.enableHAForZone(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_ha_disable_feature_invalid(self):
"""
Tests ha feature disable command with invalid options
"""
cmd = self.getHostHaDisableCmd()
cmd.hostid = -1
try:
response = self.apiclient.disableHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = disableHAForCluster.disableHAForClusterCmd()
response = self.apiclient.disableHAForCluster(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = disableHAForZone.disableHAForZoneCmd()
response = self.apiclient.disableHAForZone(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_enable_feature_valid(self):
"""
Tests host-ha enable feature with valid options
"""
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_disable_feature_valid(self):
"""
Tests host-ha disable feature with valid options
"""
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaDisableCmd()
response = self.apiclient.disableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, False)
response = self.getHost(hostId=cmd.hostid).hostha
self.assertEqual(response.hastate, 'Disabled')
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_configure_ha_provider_invalid(self):
"""
Tests configure HA Provider with invalid provider options
"""
# Enable ha for host
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
host = self.getHost(response.hostid)
# Setup wrong configuration for the host
conf_ha_cmd = configureHAForHost.configureHAForHostCmd()
if host.hypervisor.lower() in "simulator":
conf_ha_cmd.provider = "kvmhaprovider"
if host.hypervisor.lower() in "kvm":
conf_ha_cmd.provider = "simulatorhaprovider"
conf_ha_cmd.hostid = cmd.hostid
# Call the configure HA provider API with not supported provider for HA
try:
self.apiclient.configureHAForHost(conf_ha_cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_configure_ha_provider_valid(self):
"""
Tests configure HA Provider with valid provider options
"""
# Enable ha for host
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
host = self.getHost(response.hostid)
# Setup configuration for the host
conf_ha_cmd = configureHAForHost.configureHAForHostCmd()
if host.hypervisor.lower() in "kvm":
conf_ha_cmd.provider = "kvmhaprovider"
if host.hypervisor.lower() in "simulator":
conf_ha_cmd.provider = "simulatorhaprovider"
conf_ha_cmd.hostid = cmd.hostid
# Call the configure HA provider API with not supported provider for HA
response = self.apiclient.configureHAForHost(conf_ha_cmd)
# Check the response contains the set provider and hostID
self.assertEqual(response.haprovider, conf_ha_cmd.provider)
self.assertEqual(response.hostid, conf_ha_cmd.hostid)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_disable_oobm_ha_state_ineligible(self):
"""
Tests that when HA is enabled for a host, if oobm is disabled HA State should turn into Ineligible
"""
# Enable ha for host
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
# Disable OOBM
self.apiclient.configureOutOfBandManagement(self.getOobmConfigCmd())
oobm_cmd = self.getOobmDisableCmd()
oobm_cmd.hostid = cmd.hostid
response = self.apiclient.disableOutOfBandManagementForHost(oobm_cmd)
self.assertEqual(response.hostid, oobm_cmd.hostid)
self.assertEqual(response.enabled, False)
response = self.getHost(hostId=cmd.hostid).outofbandmanagement
self.assertEqual(response.powerstate, 'Disabled')
# Verify HA State is Ineligeble
response = self.getHost(hostId=cmd.hostid).hostha
self.assertEqual(response.hastate, "Ineligible")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_configure_default_driver(self):
"""
Tests host-ha configuration with valid data
"""
cmd = self.getHostHaConfigCmd()
response = self.apiclient.configureHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haprovider, cmd.provider.lower())
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_enable_ha_when_host_powerstate_on(self):
"""
Tests that when HA is enabled for a host, if oobm state is on HA State should turn into Available
"""
self.configureAndStartIpmiServer()
self.assertIssueCommandState('ON', 'On')
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
# Verify HA State is Available
self.check_host_transition_to_available()
response = self.getHost()
if response.hostha.hastate is not "Available":
print response
self.assertEqual(response.hostha.hastate, "Available")
self.stopIpmiServer()
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_enable_feature_without_setting_provider(self):
"""
Tests Enable HA without setting the provider, Exception is thrown
"""
host = self.get_non_configured_ha_host()
cmd = self.getHostHaEnableCmd()
cmd.hostid = host.id
try:
self.apiclient.enableHAForHost(cmd)
except Exception as e:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="treu")
def test_hostha_enable_ha_when_host_disabled(self):
"""
Tests Enable HA when host is disconnected, should be Ineligible
"""
# Enable HA
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
cmd.hostid = self.host.id
enable = self.apiclient.enableHAForHost(cmd)
self.assertEqual(enable.hostid, cmd.hostid)
self.assertEqual(enable.haenable, True)
# Disable Host
self.disableHost(self.host.id)
# Check HA State
try:
response = self.getHost(self.host.id)
self.assertEqual(response.hostha.hastate, "Ineligible")
except Exception as e:
self.enableHost(self.host.id)
self.fail(e)
# Enable Host
self.enableHost(self.host.id)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_enable_ha_when_host_inMaintenance(self):
"""
Tests Enable HA when host is in Maintenance mode, should be Ineligible
"""
host = self.getHost()
# Enable HA
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
cmd.hostid = host.id
enable = self.apiclient.enableHAForHost(cmd)
self.assertEqual(enable.hostid, cmd.hostid)
self.assertEqual(enable.haenable, True)
# Prepare for maintenance Host
self.setHostToMaintanance(host.id)
# Check HA State
try:
response = self.getHost(host.id)
self.assertEqual(response.hostha.hastate, "Ineligible")
except Exception as e:
self.cancelMaintenance(host.id)
self.fail(e)
# Enable Host
self.cancelMaintenance(host.id)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_hostha_enable_ha_when_host_disconected(self):
"""
Tests Enable HA when host is disconnected, should be Ineligible
"""
host = self.getHost()
# Enable HA
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
cmd.hostid = host.id
enable = self.apiclient.enableHAForHost(cmd)
self.assertEqual(enable.hostid, cmd.hostid)
self.assertEqual(enable.haenable, True)
# Make Host Disconnected
self.killAgent()
# Check HA State
try:
time.sleep(1)
response = self.getHost(self.host.id)
self.assertEqual(response.hostha.hastate, "Ineligible")
except Exception as e:
self.startAgent()
self.fail(e)
# Enable Host
self.startAgent()
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_remove_ha_provider_not_possible(self):
"""
Tests HA Provider should be possible to be removed when HA is enabled
"""
host = self.getHost()
# Enable HA
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
cmd.hostid = host.id
enable = self.apiclient.enableHAForHost(cmd)
self.assertEqual(enable.hostid, cmd.hostid)
self.assertEqual(enable.haenable, True)
try:
self.apiclient.configureHAForHost(self.getHostHaConfigCmd(''))
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
def configureAndStartIpmiServer(self, power_state=None):
"""
Setup ipmisim and enable out-of-band management for host
"""
self.configureAndEnableOobm()
self.startIpmiServer()
if power_state:
bmc = IpmiServerContext().bmc
bmc.powerstate = power_state
def assertIssueCommandState(self, command, expected):
"""
Asserts power action result for a given power command
"""
if command != 'STATUS':
self.issuePowerActionCmd(command)
response = self.issuePowerActionCmd('STATUS')
self.assertEqual(response.powerstate, expected)
def configureAndEnableOobm(self):
self.apiclient.configureOutOfBandManagement(self.getOobmConfigCmd())
response = self.apiclient.enableOutOfBandManagementForHost(self.getOobmEnableCmd())
self.assertEqual(response.enabled, True)
def startIpmiServer(self):
def startIpmiServer(tname, server):
self.debug("Starting ipmisim server")
try:
server.serve_forever()
except Exception: pass
IpmiServerContext('reset')
ThreadedIpmiServer.allow_reuse_address = False
server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer)
thread.start_new_thread(startIpmiServer, ("ipmi-server", server,))
self.server = server
def stopIpmiServer(self):
if self.server:
self.server.shutdown()
self.server.server_close()
def getOobmIssueActionCmd(self):
cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd()
cmd.hostid = self.getHost().id
cmd.action = 'STATUS'
return cmd
def issuePowerActionCmd(self, action, timeout=None):
cmd = self.getOobmIssueActionCmd()
cmd.action = action
if timeout:
cmd.timeout = timeout
try:
return self.apiclient.issueOutOfBandManagementPowerAction(cmd)
except Exception as e:
if "packet session id 0x0 does not match active session" in str(e):
raise self.skipTest("Known ipmitool issue hit, skipping test")
raise e
def getOobmEnableCmd(self):
cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def getOobmDisableCmd(self):
cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def getIpmiServerPort(self):
return self.serverPort
def getOobmConfigCmd(self):
cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd()
cmd.driver = 'ipmitool' # The default available driver
cmd.address = self.getIpmiServerIp()
cmd.port = self.getIpmiServerPort()
cmd.username = 'admin'
cmd.password = 'password'
cmd.hostid = self.getHost().id
return cmd
def getIpmiServerIp(self):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"]))
return s.getsockname()[0]
def get_non_configured_ha_host(self):
response = list_hosts(
self.apiclient,
type='Routing'
)
for host in response:
if host.haprovider is None:
return host
else:
cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.")
raise self.skipTest("No KVM hosts found, skipping host-ha test")
def getHAState(self, id):
cmd = listHostHAResources.listHostHAResourcesCmd()
cmd.hostid = id
response = self.apiclient.listHostHAResources(cmd)
return response[0]
def startAgent(self):
host = self.getHost()
SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"],
passwd=self.hostConfig["password"]).execute \
("service cloudstack-agent start")
def disableHost(self, id):
cmd = updateHost.updateHostCmd()
cmd.id = id
cmd.allocationstate = "Disable"
response = self.apiclient.updateHost(cmd)
self.assertEqual(response.resourcestate, "Disabled")
def enableHost(self, id):
cmd = updateHost.updateHostCmd()
cmd.id = id
cmd.allocationstate = "Enable"
response = self.apiclient.updateHost(cmd)
self.assertEqual(response.resourcestate, "Enabled")
def setHostToMaintanance(self, id):
cmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd()
cmd.id = id
response = self.apiclient.prepareHostForMaintenance(cmd)
self.assertEqual(response.resourcestate, "PrepareForMaintenance")
def cancelMaintenance(self, id):
cmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
cmd.id = id
response = self.apiclient.cancelHostMaintenance(cmd)
self.assertEqual(response.resourcestate, "Enabled")
def killAgent(self):
host = self.getHost()
SshClient(host=host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\
("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')")

View File

@ -0,0 +1,535 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from marvin.cloudstackTestCase import *
from marvin.lib.utils import *
from marvin.lib.base import *
from marvin.lib.common import *
from nose.plugins.attrib import attr
from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer
import random
import socket
import thread
class TestHaKVMAgent(cloudstackTestCase):
""" Test cases for out of band management
"""
def setUp(self):
testClient = super(TestHaKVMAgent, self).getClsTestClient()
self.apiClient = testClient.getApiClient()
self.dbclient = testClient.getDbConnection()
self.services = testClient.getParsedTestDataConfig()
self.zone = get_zone(self.apiClient, testClient.getZoneForTests())
self.host = self.getHost()
self.cluster_id = self.host.clusterid
self.server = None
self.hypervisor = self.testClient.getHypervisorInfo()
self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__
self.hostConfig = self.config.__dict__["zones"][0].__dict__["pods"][0].__dict__["clusters"][0].__dict__["hosts"][0].__dict__
self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20)
# Cleanup any existing configs
self.dbclient.execute("delete from ha_config where resource_type='Host'")
# use random port for ipmisim
s = socket.socket()
s.bind(('', 0))
self.serverPort = s.getsockname()[1]
s.close()
# Set Cluster-level setting in order to run tests faster
self.update_configuration("kvm.ha.activity.check.failure.ratio", "0.7")
self.update_configuration("kvm.ha.activity.check.interval", "10")
self.update_configuration("kvm.ha.activity.check.max.attempts", "5")
self.update_configuration("kvm.ha.activity.check.timeout", "60")
self.update_configuration("kvm.ha.degraded.max.period", "30")
self.update_configuration("kvm.ha.fence.timeout", "60")
self.update_configuration("kvm.ha.health.check.timeout", "10")
self.update_configuration("kvm.ha.recover.failure.threshold", "1")
self.update_configuration("kvm.ha.recover.timeout", "120")
self.update_configuration("kvm.ha.recover.wait.period", "60")
self.service_offering = ServiceOffering.create(
self.apiClient,
self.services["service_offerings"]
)
self.template = get_template(
self.apiClient,
self.zone.id,
self.services["ostype"]
)
self.cleanup = [self.service_offering]
def tearDown(self):
try:
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'")
self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'")
self.dbclient.execute("delete from ha_config where resource_type='Host'")
self.dbclient.execute("delete from oobm where port=%d" % self.getIpmiServerPort())
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from cluster_details where name='outOfBandManagementEnabled'")
self.dbclient.execute("delete from data_center_details where name='outOfBandManagementEnabled'")
cleanup_resources(self.apiClient, self.cleanup)
if self.server:
self.server.shutdown()
self.server.server_close()
except Exception as e:
raise Exception("Warning: Exception during cleanup : %s" % e)
def getFakeMsId(self):
return self.fakeMsId
def getFakeMsRunId(self):
return self.fakeMsId * 1000
def getHostHaConfigCmd(self, provider='kvmhaprovider'):
cmd = configureHAForHost.configureHAForHostCmd()
cmd.provider = provider
cmd.hostid = self.host.id
return cmd
def getHostHaEnableCmd(self):
cmd = enableHAForHost.enableHAForHostCmd()
cmd.hostid = self.host.id
return cmd
def getHost(self, hostId=None):
response = list_hosts(
self.apiClient,
zoneid=self.zone.id,
type='Routing',
id=hostId
)
if len(response) > 0:
self.host = response[0]
return self.host
raise self.skipTest("No hosts found, skipping out-of-band management test")
def getIpmiServerIp(self):
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect((self.mgtSvrDetails["mgtSvrIp"], self.mgtSvrDetails["port"]))
return s.getsockname()[0]
def getIpmiServerPort(self):
return self.serverPort
def getOobmConfigCmd(self):
cmd = configureOutOfBandManagement.configureOutOfBandManagementCmd()
cmd.driver = 'ipmitool' # The default available driver
cmd.address = self.getIpmiServerIp()
cmd.port = self.getIpmiServerPort()
cmd.username = 'admin'
cmd.password = 'password'
cmd.hostid = self.host.id
return cmd
def getOobmEnableCmd(self):
cmd = enableOutOfBandManagementForHost.enableOutOfBandManagementForHostCmd()
cmd.hostid = self.host.id
return cmd
def getOobmDisableCmd(self):
cmd = disableOutOfBandManagementForHost.disableOutOfBandManagementForHostCmd()
cmd.hostid = self.host.id
return cmd
def getOobmIssueActionCmd(self):
cmd = issueOutOfBandManagementPowerAction.issueOutOfBandManagementPowerActionCmd()
cmd.hostid = self.host.id
cmd.action = 'STATUS'
return cmd
def issue_power_action_cmd(self, action, timeout=None):
cmd = self.getOobmIssueActionCmd()
cmd.action = action
if timeout:
cmd.timeout = timeout
try:
return self.apiClient.issueOutOfBandManagementPowerAction(cmd)
except Exception as e:
if "packet session id 0x0 does not match active session" in str(e):
raise self.skipTest("Known ipmitool issue hit, skipping test")
raise e
def configure_and_enable_oobm(self):
self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd())
response = self.apiClient.enableOutOfBandManagementForHost(self.getOobmEnableCmd())
self.assertEqual(response.enabled, True)
def start_ipmi_server(self):
def startIpmiServer(tname, server):
self.debug("Starting ipmisim server")
try:
server.serve_forever()
except Exception: pass
IpmiServerContext('reset')
ThreadedIpmiServer.allow_reuse_address = False
server = ThreadedIpmiServer(('0.0.0.0', self.getIpmiServerPort()), IpmiServer)
thread.start_new_thread(startIpmiServer, ("ipmi-server", server,))
self.server = server
def checkSyncToState(self, state, interval):
def checkForStateSync(expectedState):
response = self.getHost(hostId=self.host.id).outofbandmanagement
return response.powerstate == expectedState, None
sync_interval = 1 + int(interval)/1000
res, _ = wait_until(sync_interval, 10, checkForStateSync, state)
if not res:
self.fail("Failed to get host.powerstate synced to expected state:" + state)
response = self.getHost(hostId=self.host.id).outofbandmanagement
self.assertEqual(response.powerstate, state)
def get_host_in_available_state(self):
self.configure_and_start_ipmi_server()
self.assert_issue_command_state('ON', 'On')
self.configureAndEnableHostHa()
self.check_host_transition_to_available()
response = self.getHost()
if response.hostha.hastate is not "Available":
print response
self.assertEqual(response.hostha.hastate, "Available")
def configureAndEnableHostHa(self):
self.apiClient.configureHAForHost(self.getHostHaConfigCmd())
response = self.apiClient.enableHAForHost(self.getHostHaEnableCmd())
self.assertEqual(response.haenable, True)
def configure_and_start_ipmi_server(self, power_state=None):
"""
Setup ipmisim and enable out-of-band management for host
"""
self.configure_and_enable_oobm()
self.start_ipmi_server()
if power_state:
bmc = IpmiServerContext().bmc
bmc.powerstate = power_state
def assert_issue_command_state(self, command, expected):
"""
Asserts power action result for a given power command
"""
if command != 'STATUS':
self.issue_power_action_cmd(command)
response = self.issue_power_action_cmd('STATUS')
self.assertEqual(response.powerstate, expected)
def kill_agent(self):
t_end = time.time() + 90
while time.time() < t_end:
try:
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"],
passwd=self.hostConfig["password"]).execute \
("kill $(ps aux | grep 'cloudstack-agent' | awk '{print $2}')")
return
except Exception:
print("Cannot ssh into: " + self.host.ipaddress)
self.fail(self)
def set_host_to_alert(self):
self.dbclient.execute("update host set host.status = 'Alert' where host.uuid = '%s'" % self.host.id)
def check_host_transitioned_to_degraded(self):
t_end = time.time() + 120
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate in "Degraded":
return
else:
continue
self.fail(self)
def wait_util_host_is_fencing(self):
t_end = time.time() + 120
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate in "Fencing":
return
else:
continue
self.fail(self)
def check_host_transitioned_to_suspect(self):
t_end = time.time() + 120
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate in "Suspect":
return
else:
continue
self.fail(self)
def check_host_transitioned_to_checking(self):
t_end = time.time() + 120
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate in "Checking":
return
else:
continue
self.fail(self)
def wait_util_host_is_fenced(self):
t_end = time.time() + 120
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate in "Fenced":
return
else:
continue
self.fail(self)
def wait_util_host_is_up(self):
t_end = time.time() + 120
while time.time() < t_end:
host = self.getHost()
if host.state in "Up":
return
else:
continue
self.fail(self)
def stop_agent(self):
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"], passwd=self.hostConfig["password"]).execute\
("service cloudstack-agent stop")
def start_agent(self):
self.ssh_and_restart_agent()
self.check_host_transition_to_available()
def ssh_and_restart_agent(self):
t_end = time.time() + 90
while time.time() < t_end:
try:
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"],
passwd=self.hostConfig["password"]).execute \
("service cloudstack-agent restart")
return
except Exception:
print("Cannot ssh into: " + self.host.ipaddress)
self.fail(self)
def check_host_transition_to_available(self):
t_end = time.time() + 90
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate == "Available":
return
else:
continue
self.fail(self)
def wait_util_host_is_recovered(self):
t_end = time.time() + 180
while time.time() < t_end:
host = self.getHost()
if host.hostha.hastate in "Recovered":
return
else:
continue
self.fail(self)
def reset_host(self):
SshClient(self.host.ipaddress, port=22, user=self.hostConfig["username"],
passwd=self.hostConfig["password"]).execute \
("reboot")
def deploy_vm(self):
vm = VirtualMachine.create(
self.apiClient,
services=self.services["virtual_machine"],
serviceofferingid=self.service_offering.id,
templateid=self.template.id,
zoneid=self.zone.id,
hostid = self.host.id,
method="POST"
)
self.cleanup.append(vm)
def update_configuration(self, name, value):
update_configuration_cmd = updateConfiguration.updateConfigurationCmd()
update_configuration_cmd.name = name
update_configuration_cmd.value = value
update_configuration_cmd.clusterid = self.cluster_id
self.apiClient.updateConfiguration(update_configuration_cmd)
@attr(tags = ["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_ha_stop_agent_host_is_degraded(self):
"""
Tests HA state turns Degraded when agent is stopped
"""
self.deploy_vm()
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
self.get_host_in_available_state()
# SSH into the KVM Host and executes kill -9 of the agent
self.stop_agent()
# Checks if the host would turn into Degraded in the next 120 seconds
try:
self.check_host_transitioned_to_degraded()
except Exception as e:
self.start_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# Enable Host
self.start_agent()
#@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_ha_recovering_start_agent_host_is_available(self):
"""
Tests HA state turns Recovered when agent is stopped and host is reset
"""
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
# Then kills the agent and wait untill the state is Degraded
self.deploy_vm()
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
self.get_host_in_available_state()
# SSH into the KVM Host and executes kill -9 of the agent
self.kill_agent()
# Checks if the host would turn into Degraded in the next 120 seconds
try:
self.check_host_transitioned_to_degraded()
except Exception as e:
self.start_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# Reset host so a shut down could be emulated. During the bootup host should transition into recovered state
self.reset_host()
# Waits until Degraded host turns into Recovered for 180 seconds,
# if it fails it tries to revert host back to Available
try:
self.wait_util_host_is_recovered()
except Exception as e:
self.start_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# SSH into the KVM Host and executes service cloudstack-agent restart of the agent
self.start_agent()
#@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_ha_fencing_host(self):
"""
Tests HA state turns Recovered when agent is stopped and host is reset,
then configure incorrect OOBM configuration, so that Recover command would fail
and host would transition into Fenced state.
"""
self.deploy_vm()
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
self.get_host_in_available_state()
# SSH into the KVM Host and executes kill -9 of the agent
self.kill_agent()
# Checks if the host would turn into Degraded in the next 120 seconds
try:
self.check_host_transitioned_to_degraded()
except Exception as e:
self.start_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# Change OOBM Configuration to invalid so it would fail the recover operations.
cmd = self.getOobmConfigCmd()
cmd.address = "1.1.1.1"
self.apiClient.configureOutOfBandManagement(cmd)
# Reset host so a shut down could be emulated. During the bootup host should transition into recovered state
self.reset_host()
self.kill_agent()
# Waits until Recovering host turns into Fencing for 180 seconds,
# if it fails it tries to revert host back to Up
try:
self.wait_util_host_is_fencing()
except Exception as e:
self.ssh_and_restart_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# Configure correct OOBM configuration so that the Fencing operation would succeed
self.apiClient.configureOutOfBandManagement(self.getOobmConfigCmd())
# Waits until Fencing host turns into Fenced for 180 seconds,
# if it fails it tries to revert host back to Up
try:
self.wait_util_host_is_fenced()
except Exception as e:
self.ssh_and_restart_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# SSH into the KVM Host and executes service cloudstack-agent restart of the agent
self.ssh_and_restart_agent()
# Waits until state is Up so that cleanup would be successful
self.wait_util_host_is_up()
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="true")
def test_ha_kill_agent_host_is_degraded(self):
"""
Tests HA state turns Suspect/Checking when some activity/health checks fail
Configures HA, Logs into to a host and restarts the service
Then it confirms the ha state jumps through Suspect -> Checking -> Available
"""
# Configure and Enable OOBM, Set HA Provider and Enable HA. At the end checks if HA State is Available
self.get_host_in_available_state()
# SSH into the KVM Host and executes kill -9 of the agent
self.ssh_and_restart_agent()
# Checks if the host would turn into Suspect in the next 120 seconds
try:
self.check_host_transitioned_to_suspect()
except Exception as e:
self.start_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# Checks if the host would turn into Degraded in the next 120 seconds
try:
self.check_host_transitioned_to_checking()
except Exception as e:
self.start_agent()
raise Exception("Warning: Exception during test execution : %s" % e)
# Enable Host
self.check_host_transition_to_available()

View File

@ -0,0 +1,656 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import marvin
from marvin.cloudstackTestCase import *
from marvin.cloudstackAPI import *
from marvin.lib.utils import *
from marvin.lib.base import *
from marvin.lib.common import *
from nose.plugins.attrib import attr
import random
from ipmisim.ipmisim import IpmiServerContext, IpmiServer, ThreadedIpmiServer
import random
import socket
import sys
import thread
import time
class TestHostHA(cloudstackTestCase):
""" Test cases for host HA using Simulator host(s)
"""
def setUp(self):
self.apiclient = self.testClient.getApiClient()
self.hypervisor = self.testClient.getHypervisorInfo()
self.dbclient = self.testClient.getDbConnection()
self.services = self.testClient.getParsedTestDataConfig()
self.mgtSvrDetails = self.config.__dict__["mgtSvr"][0].__dict__
self.fakeMsId = random.randint(10000, 99999) * random.randint(10, 20)
self.host = None
# Cleanup any existing configs
self.dbclient.execute("delete from ha_config where resource_type='Host'")
# use random port for ipmisim
s = socket.socket()
s.bind(('', 0))
self.serverPort = s.getsockname()[1]
s.close()
# Get a host to run tests against
self.host = self.getHost()
self.cleanup = []
def tearDown(self):
try:
self.dbclient.execute("delete from mshost_peer where peer_runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from mshost where runid=%s" % self.getFakeMsRunId())
self.dbclient.execute("delete from cluster_details where name='resourceHAEnabled'")
self.dbclient.execute("delete from data_center_details where name='resourceHAEnabled'")
self.dbclient.execute("delete from ha_config where resource_type='Host'")
self.dbclient.execute("update host set resource_state='Enabled' where type='Routing' and resource_state='Maintenance'")
cleanup_resources(self.apiclient, self.cleanup)
except Exception as e:
raise Exception("Warning: Exception during cleanup : %s" % e)
def getFakeMsId(self):
return self.fakeMsId
def getFakeMsRunId(self):
return self.fakeMsId * 1000
def getHost(self, hostId=None):
if self.host and hostId is None:
return self.host
response = list_hosts(
self.apiclient,
type='Routing',
hypervisor='Simulator',
resourcestate='Enabled',
id=hostId
)
if response and len(response) > 0:
self.host = response[0]
return self.host
raise self.skipTest("No simulator hosts found, skipping host-ha test")
def getHostHaConfigCmd(self, provider='simulatorhaprovider'):
cmd = configureHAForHost.configureHAForHostCmd()
cmd.provider = provider
cmd.hostid = self.getHost().id
return cmd
def getHostHaEnableCmd(self):
cmd = enableHAForHost.enableHAForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def getHostHaDisableCmd(self):
cmd = disableHAForHost.disableHAForHostCmd()
cmd.hostid = self.getHost().id
return cmd
def configureAndEnableHostHa(self, initialize=True):
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
response = self.apiclient.enableHAForHost(self.getHostHaEnableCmd())
self.assertEqual(response.haenable, True)
if initialize:
self.configureSimulatorHAProviderState(True, True, True, False)
def configureAndDisableHostHa(self, hostId):
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaDisableCmd()
cmd.hostid = hostId
response = self.apiclient.disableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, False)
def enableHostHa(self, hostId):
cmd = self.getHostHaEnableCmd()
cmd.hostid = hostId
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
def configureSimulatorHAProviderState(self, health, activity, recover, fence):
cmd = configureSimulatorHAProviderState.configureSimulatorHAProviderStateCmd()
cmd.hostid = self.getHost().id
cmd.health = health
cmd.activity = activity
cmd.recover = recover
cmd.fence = fence
response = self.apiclient.configureSimulatorHAProviderState(cmd)
self.assertEqual(response.success, 'true')
def getSimulatorHAStateTransitions(self, hostId):
cmd = listSimulatorHAStateTransitions.listSimulatorHAStateTransitionsCmd()
cmd.hostid = hostId
return self.apiclient.listSimulatorHAStateTransitions(cmd)
def checkSyncToState(self, state, interval=5000):
def checkForStateSync(expectedState):
response = self.getHost(hostId=self.getHost().id).hostha
return response.hastate == expectedState, None
sync_interval = 1 + int(interval) / 1000
res, _ = wait_until(sync_interval, 50, checkForStateSync, state)
if not res:
self.fail("Failed to get host.hastate synced to expected state:" + state)
response = self.getHost(hostId=self.getHost().id).hostha
self.assertEqual(response.hastate, state)
def get_non_configured_ha_host(self):
response = list_hosts(
self.apiclient,
type='Routing'
)
for host in response:
if host.haprovider is None:
return host
else:
return None
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_hostha_enable_feature_without_setting_provider(self):
"""
Tests Enable HA without setting the provider, Exception is thrown
"""
host = self.get_non_configured_ha_host()
if host is None:
cloudstackTestCase.skipTest(self, "There is no non configured hosts. Skipping test.")
cmd = self.getHostHaEnableCmd()
cmd.hostid = host.id
try:
response = self.apiclient.enableHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_list_providers(self):
"""
Tests default ha providers list
"""
cmd = listHostHAProviders.listHostHAProvidersCmd()
cmd.hypervisor = 'Simulator'
response = self.apiclient.listHostHAProviders(cmd)[0]
self.assertEqual(response.haprovider, 'SimulatorHAProvider')
cmd.hypervisor = 'KVM'
response = self.apiclient.listHostHAProviders(cmd)[0]
self.assertEqual(response.haprovider, 'KVMHAProvider')
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_hostha_configure_invalid_provider(self):
"""
Tests host-ha configuration with invalid driver
"""
cmd = self.getHostHaConfigCmd()
cmd.provider = 'randomDriverThatDoesNotExist'
try:
response = self.apiclient.configureHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_hostha_configure_default_driver(self):
"""
Tests host-ha configuration with valid data
"""
cmd = self.getHostHaConfigCmd()
response = self.apiclient.configureHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haprovider, cmd.provider.lower())
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_enable_feature_invalid(self):
"""
Tests ha feature enable command with invalid options
"""
cmd = self.getHostHaEnableCmd()
cmd.hostid = -1
try:
response = self.apiclient.enableHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = enableHAForCluster.enableHAForClusterCmd()
response = self.apiclient.enableHAForCluster(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = enableHAForZone.enableHAForZoneCmd()
response = self.apiclient.enableHAForZone(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_disable_feature_invalid(self):
"""
Tests ha feature disable command with invalid options
"""
cmd = self.getHostHaDisableCmd()
cmd.hostid = -1
try:
response = self.apiclient.disableHAForHost(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = disableHAForCluster.disableHAForClusterCmd()
response = self.apiclient.disableHAForCluster(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
try:
cmd = disableHAForZone.disableHAForZoneCmd()
response = self.apiclient.disableHAForZone(cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_hostha_enable_feature_valid(self):
"""
Tests host-ha enable feature with valid options
"""
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_hostha_disable_feature_valid(self):
"""
Tests host-ha disable feature with valid options
"""
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaDisableCmd()
response = self.apiclient.disableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, False)
response = self.getHost(hostId=cmd.hostid).hostha
self.assertEqual(response.hastate, 'Disabled')
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_enabledisable_across_clusterzones(self):
"""
Tests ha enable/disable feature at cluster and zone level
Zone > Cluster > Host
"""
self.configureAndEnableHostHa()
host = self.getHost()
self.checkSyncToState('Available')
response = self.getHost(hostId=host.id).hostha
self.assertTrue(response.hastate == 'Available')
# Disable at host level
cmd = disableHAForHost.disableHAForHostCmd()
cmd.hostid = host.id
response = self.apiclient.disableHAForHost(cmd)
# Disable at cluster level
cmd = disableHAForCluster.disableHAForClusterCmd()
cmd.clusterid = host.clusterid
response = self.apiclient.disableHAForCluster(cmd)
# Disable at zone level
cmd = disableHAForZone.disableHAForZoneCmd()
cmd.zoneid = host.zoneid
response = self.apiclient.disableHAForZone(cmd)
# HA state check
response = self.getHost(hostId=host.id).hostha
self.assertTrue(response.hastate == 'Disabled')
# Check ha-state check and sync
self.dbclient.execute("update ha_config set ha_state='Available' where enabled='1' and resource_type='Host'")
self.checkSyncToState('Disabled')
# Enable at zone level
cmd = enableHAForZone.enableHAForZoneCmd()
cmd.zoneid = host.zoneid
response = self.apiclient.enableHAForZone(cmd)
# Enable at cluster level
cmd = enableHAForCluster.enableHAForClusterCmd()
cmd.clusterid = host.clusterid
response = self.apiclient.enableHAForCluster(cmd)
# Enable at host level
cmd = enableHAForHost.enableHAForHostCmd()
cmd.hostid = host.id
response = self.apiclient.enableHAForHost(cmd)
# Check state sync
self.checkSyncToState('Available')
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_multiple_mgmt_server_ownership(self):
"""
Tests ha resource ownership expiry across multi-mgmt server
"""
self.configureAndEnableHostHa()
cloudstackVersion = Configurations.listCapabilities(self.apiclient).cloudstackversion
currentMsHosts = []
mshosts = self.dbclient.execute(
"select msid from mshost where version='%s' and removed is NULL and state='Up'" % (cloudstackVersion))
if len(mshosts) > 0:
currentMsHosts = map(lambda row: row[0], mshosts)
# Inject fake ms host
self.dbclient.execute(
"insert into mshost (msid,runid,name,state,version,service_ip,service_port,last_update) values (%s,%s,'ha-marvin-fakebox', 'Down', '%s', '127.0.0.1', '22', NOW())" % (
self.getFakeMsId(), self.getFakeMsRunId(), cloudstackVersion))
# Pass ownership to the fake ms id
self.dbclient.execute(
"update ha_config set mgmt_server_id=%d where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'" % self.getFakeMsId())
pingInterval = float(list_configurations(
self.apiclient,
name='ping.interval'
)[0].value)
pingTimeout = float(list_configurations(
self.apiclient,
name='ping.timeout'
)[0].value)
def removeFakeMgmtServer(fakeMsRunId):
rows = self.dbclient.execute("select * from mshost_peer where peer_runid=%s" % fakeMsRunId)
if len(rows) > 0:
self.debug("Mgmt server is now trying to contact the fake mgmt server")
self.dbclient.execute("update mshost set removed=now() where runid=%s" % fakeMsRunId)
self.dbclient.execute("update mshost_peer set peer_state='Down' where peer_runid=%s" % fakeMsRunId)
return True, None
return False, None
def checkHaOwnershipExpiry(fakeMsId):
rows = self.dbclient.execute(
"select mgmt_server_id from ha_config where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'")
if len(rows) > 0 and rows[0][0] != fakeMsId:
self.debug("HA resource ownership expired as node was detected to be gone")
return True, None
return False, None
retry_interval = 1 + (pingInterval * pingTimeout / 10)
res, _ = wait_until(retry_interval, 20, removeFakeMgmtServer, self.getFakeMsRunId())
if not res:
self.fail("Management server failed to turn down or remove fake mgmt server")
res, _ = wait_until(retry_interval, 100, checkHaOwnershipExpiry, self.getFakeMsId())
if not res:
self.fail("Management server failed to expire ownership of fenced peer")
self.debug("Testing ha background sync should claim new ownership")
self.checkSyncToState('Available')
result = self.dbclient.execute(
"select mgmt_server_id from ha_config where resource_type='Host' and enabled=1 and provider='simulatorhaprovider'")
newOwnerId = result[0][0]
self.assertTrue(newOwnerId in currentMsHosts)
def checkFSMTransition(self, transition, event, haState, prevHaState, hasActiviyCounter, hasRecoveryCounter):
self.assertEqual(transition.event, event)
self.assertEqual(transition.hastate, haState)
self.assertEqual(transition.prevhastate, prevHaState)
if hasActiviyCounter:
self.assertTrue(transition.activitycounter > 0)
else:
self.assertEqual(transition.activitycounter, 0)
if hasRecoveryCounter:
self.assertTrue(transition.recoverycounter > 0)
else:
self.assertEqual(transition.recoverycounter, 0)
def findFSMTransitionToState(self, state, host):
transitions = self.getSimulatorHAStateTransitions(host.id)
if not transitions:
return False, (None, None, None)
previousTransition = None
stateTransition = None
nextTransition = None
for transition in transitions:
if stateTransition:
nextTransition = transition
break
if transition.hastate == state:
stateTransition = transition
if not stateTransition:
previousTransition = transition
if stateTransition:
return True, (previousTransition, stateTransition, nextTransition,)
return False, (previousTransition, stateTransition, nextTransition,)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_verify_fsm_available(self):
"""
Tests ha FSM transitions for valid healthy host
Simulates health check passing
"""
host = self.getHost()
self.configureAndDisableHostHa(host.id)
self.configureSimulatorHAProviderState(True, True, True, False)
self.configureAndEnableHostHa(False)
res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'available', host)
if not res:
self.fail("FSM did not transition to available state")
self.checkFSMTransition(T, 'enabled', 'available', 'disabled', False, False)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_verify_fsm_degraded(self):
"""
Tests ha FSM transitions leading to degraded state
Simulates health check failures with activity checks passing
FSM transitions should happen indefinitely between:
Available->Suspect<->Checking->Degraded->Available
"""
host = self.getHost()
self.configureSimulatorHAProviderState(False, True, True, False)
self.configureAndEnableHostHa(False)
# Initial health check failure
res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'suspect', host)
if not res:
self.fail("FSM did not transition to suspect state")
self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False)
# Check transition to Degraded
res, (prevT, T, nextT) = wait_until(2, 50, self.findFSMTransitionToState, 'degraded', host)
if not res:
self.fail("FSM did not transition to degraded state")
if prevT:
self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False)
self.checkFSMTransition(T, 'activitycheckfailureunderthresholdratio', 'degraded', 'checking', True, False)
if nextT:
self.checkFSMTransition(nextT, 'periodicrecheckresourceactivity', 'suspect', 'degraded', False, False)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_verify_fsm_recovering(self):
"""
Tests ha FSM transitions leading to recovering
Simulates both health and activity check failures
FSM transitions should happen indefinitely between:
Available->Suspect<->Checking->Recovering->Recovered<-retry-loop->->Fencing
"""
host = self.getHost()
self.configureSimulatorHAProviderState(False, False, True, False)
self.configureAndEnableHostHa(False)
# Initial health check failure
res, (_, T, _) = wait_until(2, 50, self.findFSMTransitionToState, 'suspect', host)
if not res:
self.fail("FSM did not transition to suspect state")
self.checkFSMTransition(T, 'healthcheckfailed', 'suspect', 'available', False, False)
# Check transition to recovering
res, (prevT, T, nextT) = wait_until(2, 100, self.findFSMTransitionToState, 'recovering', host)
if not res:
self.fail("FSM did not transition to recovering state")
if prevT:
self.checkFSMTransition(prevT, 'performactivitycheck', 'checking', 'suspect', True, False)
self.checkFSMTransition(T, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True, False)
if nextT:
self.checkFSMTransition(nextT, 'recovered', 'recovered', 'recovering', False, True)
# Check transition to fencing due to recovery attempts exceeded
res, (prevT, T, nextT) = wait_until(2, 100, self.findFSMTransitionToState, 'fencing', host)
if not res:
self.fail("FSM did not transition to fencing state")
if prevT:
self.checkFSMTransition(prevT, 'activitycheckfailureoverthresholdratio', 'recovering', 'checking', True,
True)
self.checkFSMTransition(T, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_ha_verify_fsm_fenced(self):
"""
Tests ha FSM transitions for failures leading to fenced state
FSM transitions should happen indefinitely between:
Available->Suspect<->Checking->Recovering<-fail recovery->->Fencing->Fenced
"""
host = self.getHost()
self.configureAndDisableHostHa(host.id)
self.configureSimulatorHAProviderState(False, False, False, True)
self.configureAndEnableHostHa(False)
# Check for transition to fenced
res, (prevT, T, _) = wait_until(2, 100, self.findFSMTransitionToState, 'fenced', host)
if not res:
self.fail("FSM did not transition to fenced state")
self.checkFSMTransition(prevT, 'recoveryoperationthresholdexceeded', 'fencing', 'recovering', False, True)
self.checkFSMTransition(T, 'fenced', 'fenced', 'fencing', False, False)
# TODO: add test case for HA vm reboot checks
# Simulate manual recovery of host and cancel maintenance mode
self.configureSimulatorHAProviderState(True, True, True, False)
cancelCmd = cancelHostMaintenance.cancelHostMaintenanceCmd()
cancelCmd.id = host.id
self.apiclient.cancelHostMaintenance(cancelCmd)
# Check for transition to available after manual recovery
res, (prevT, T, _) = wait_until(2, 100, self.findFSMTransitionToState, 'available', host)
if not res:
self.fail("FSM did not transition to available state")
self.checkFSMTransition(prevT, 'healthcheckpassed', 'ineligible', 'fenced', False, False)
self.checkFSMTransition(T, 'eligible', 'available', 'ineligible', False, False)
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_configure_ha_provider_invalid(self):
"""
Tests configure HA Provider with invalid provider options
"""
# Enable ha for host
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
host = self.getHost(response.hostid)
# Setup wrong configuration for the host
conf_ha_cmd = configureHAForHost.configureHAForHostCmd()
if host.hypervisor.lower() in "simulator":
conf_ha_cmd.provider = "kvmhaprovider"
if host.hypervisor.lower() in "kvm":
conf_ha_cmd.provider = "simulatorhaprovider"
conf_ha_cmd.hostid = cmd.hostid
# Call the configure HA provider API with not supported provider for HA
try:
self.apiclient.configureHAForHost(conf_ha_cmd)
except Exception:
pass
else:
self.fail("Expected an exception to be thrown, failing")
@attr(tags=["devcloud", "advanced", "advancedns", "smoke", "basic", "sg"], required_hardware="false")
def test_configure_ha_provider_valid(self):
"""
Tests configure HA Provider with valid provider options
"""
# Enable ha for host
self.apiclient.configureHAForHost(self.getHostHaConfigCmd())
cmd = self.getHostHaEnableCmd()
response = self.apiclient.enableHAForHost(cmd)
self.assertEqual(response.hostid, cmd.hostid)
self.assertEqual(response.haenable, True)
host = self.getHost(response.hostid)
# Setup wrong configuration for the host
conf_ha_cmd = configureHAForHost.configureHAForHostCmd()
if host.hypervisor.lower() in "kvm":
conf_ha_cmd.provider = "kvmhaprovider"
if host.hypervisor.lower() in "simulator":
conf_ha_cmd.provider = "simulatorhaprovider"
conf_ha_cmd.hostid = cmd.hostid
# Call the configure HA provider API with not supported provider for HA
response = self.apiclient.configureHAForHost(conf_ha_cmd)
# Check the response contains the set provider and hostID
self.assertEqual(response.haprovider, conf_ha_cmd.provider)
self.assertEqual(response.hostid, conf_ha_cmd.hostid)

View File

@ -12756,6 +12756,38 @@ div.ui-dialog div.autoscaler div.field-group div.form-container form div.form-it
background-position: -137px -614px;
}
.blankHAForHost .icon {
background-position: -266px -31px;
}
.blankHAForHost:hover .icon {
background-position: -266px -31px;
}
.configureHAForHost .icon {
background-position: -270px -148px;
}
.configureHAForHost:hover .icon {
background-position: -270px -728px;
}
.enableHA .icon {
background-position: -265px -93px;
}
.enableHA:hover .icon {
background-position: -265px -673px;
}
.disableHA .icon {
background-position: -265px -120px;
}
.disableHA:hover .icon {
background-position: -265px -700px;
}
.blankOutOfBandManagement .icon {
background-position: -266px -31px;
}

Some files were not shown because too many files have changed in this diff Show More