CLOUDSTACK-9114: Reduce VR downtime during network restart (#2508)

This introduces a rolling restart of VRs when networks are restarted
with cleanup option for isolated and VPC networks. A make redundant option is
shown for isolated networks now in UI.

Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
Rohit Yadav 2018-05-11 12:48:07 +05:30 committed by GitHub
parent bd89760108
commit a77ed56b86
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
26 changed files with 391 additions and 128 deletions

View File

@ -351,6 +351,8 @@ public interface Network extends ControlledEntity, StateObject<Network.State>, I
boolean isRedundant();
boolean isRollingRestart();
long getRelated();
URI getBroadcastUri();

View File

@ -155,6 +155,11 @@ public class NetworkProfile implements Network {
return this.isRedundant;
}
@Override
public boolean isRollingRestart() {
return false;
}
@Override
public String getName() {
return name;

View File

@ -70,7 +70,7 @@ public interface NetworkService {
boolean deleteNetwork(long networkId, boolean forced);
boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
int getActiveNicsInNetwork(long networkId);

View File

@ -87,4 +87,8 @@ public interface Vpc extends ControlledEntity, Identity, InternalIdentity {
* @return true if VPC spans multiple zones in the region
*/
boolean isRegionLevelVpc();
boolean isRollingRestart();
void setRollingRestart(boolean rollingRestart);
}

View File

@ -56,7 +56,7 @@ public class ApiConstants {
public static final String CIDR_LIST = "cidrlist";
public static final String DEST_CIDR_LIST = "destcidrlist";
public static final String CLEANUP = "cleanup";
public static final String MAKEREDUNDANTE = "makeredundant";
public static final String MAKEREDUNDANT = "makeredundant";
public static final String CLUSTER_ID = "clusterid";
public static final String CLUSTER_NAME = "clustername";
public static final String CLUSTER_TYPE = "clustertype";
@ -681,6 +681,7 @@ public class ApiConstants {
public static final String REMAININGCAPACITY = "remainingcapacity";
public static final String MAXCAPACITY = "maxcapacity";
public static final String DISTRIBUTED_VPC_ROUTER = "distributedvpcrouter";
public static final String REDUNDANT_ROUTER = "redundantrouter";
public static final String REDUNDANT_VPC_ROUTER = "redundantvpcrouter";
public static final String READ_ONLY = "readonly";
public static final String SUPPORTS_REGION_LEVEL_VPC = "supportsregionLevelvpc";

View File

@ -55,7 +55,10 @@ public class RestartNetworkCmd extends BaseAsyncCmd {
private Long id;
@Parameter(name = ApiConstants.CLEANUP, type = CommandType.BOOLEAN, required = false, description = "If cleanup old network elements")
private Boolean cleanup;
private Boolean cleanup = false;
@Parameter(name = ApiConstants.MAKEREDUNDANT, type = CommandType.BOOLEAN, required = false, description = "Turn the network into a network with redundant routers.", since = "4.11.1")
private Boolean makeRedundant = false;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
@ -71,10 +74,11 @@ public class RestartNetworkCmd extends BaseAsyncCmd {
}
public Boolean getCleanup() {
if (cleanup != null) {
return cleanup;
}
return true;
public Boolean getMakeRedundant() {
return makeRedundant;
}
/////////////////////////////////////////////////////
@ -92,7 +96,7 @@ public class RestartNetworkCmd extends BaseAsyncCmd {
@Override
public void execute() throws ResourceUnavailableException, ResourceAllocationException, ConcurrentOperationException, InsufficientCapacityException {
boolean result = _networkService.restartNetwork(this, getCleanup());
boolean result = _networkService.restartNetwork(this, getCleanup(), getMakeRedundant());
if (result) {
SuccessResponse response = new SuccessResponse(getCommandName());
setResponseObject(response);

View File

@ -49,10 +49,10 @@ public class RestartVPCCmd extends BaseAsyncCmd {
private Long id;
@Parameter(name = ApiConstants.CLEANUP, type = CommandType.BOOLEAN, required = false, description = "If cleanup old network elements")
private Boolean cleanup;
private Boolean cleanup = false;
@Parameter(name = ApiConstants.MAKEREDUNDANTE, type = CommandType.BOOLEAN, required = false, description = "Turn a single VPC into a redundant one.")
private Boolean makeredundant;
@Parameter(name = ApiConstants.MAKEREDUNDANT, type = CommandType.BOOLEAN, required = false, description = "Turn a single VPC into a redundant one.")
private Boolean makeredundant = false;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
@ -63,18 +63,12 @@ public class RestartVPCCmd extends BaseAsyncCmd {
}
public Boolean getCleanup() {
if (cleanup != null) {
return cleanup;
}
return true;
}
public Boolean getMakeredundant() {
if (makeredundant != null) {
return makeredundant;
}
return true;
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////

View File

@ -229,6 +229,10 @@ public class NetworkResponse extends BaseResponse implements ControlledEntityRes
@Param(description = "The external id of the network", since = "4.11")
private String externalId;
@SerializedName(ApiConstants.REDUNDANT_ROUTER)
@Param(description = "If the network has redundant routers enabled", since = "4.11.1")
private Boolean redundantRouter;
public Boolean getDisplayNetwork() {
return displayNetwork;
}
@ -437,4 +441,12 @@ public class NetworkResponse extends BaseResponse implements ControlledEntityRes
public void setExternalId(String externalId) {
this.externalId = externalId;
}
public Boolean getRedundantRouter() {
return redundantRouter;
}
public void setRedundantRouter(Boolean redundantRouter) {
this.redundantRouter = redundantRouter;
}
}

View File

@ -43,6 +43,7 @@ import com.cloud.network.element.LoadBalancingServiceProvider;
import com.cloud.network.element.StaticNatServiceProvider;
import com.cloud.network.element.UserDataServiceProvider;
import com.cloud.network.guru.NetworkGuru;
import com.cloud.network.router.VirtualRouter;
import com.cloud.network.rules.LoadBalancerContainer.Scheme;
import com.cloud.offering.NetworkOffering;
import com.cloud.user.Account;
@ -65,6 +66,12 @@ public interface NetworkOrchestrationService {
String NetworkThrottlingRateCK = "network.throttling.rate";
String MinVRVersionCK = "minreq.sysvmtemplate.version";
/**
* The redundant router handover time which is defined by VRRP2 spec as:
* (3 * advertisement interval + skew_seconds) or 10s with CloudStack default
*/
Long RVRHandoverTime = 10000L;
ConfigKey<String> MinVRVersion = new ConfigKey<String>(String.class, MinVRVersionCK, "Advanced", "4.10.0",
"What version should the Virtual Routers report", true, ConfigKey.Scope.Zone, null);
@ -282,4 +289,21 @@ public interface NetworkOrchestrationService {
void finalizeUpdateInSequence(Network network, boolean success);
List<NetworkGuru> getNetworkGurus();
/**
* destroyExpendableRouters will find and destroy safely destroyable routers
* that are in bad states or are backup routers
* @param routers list of routers
* @param context reservation context
* @throws ResourceUnavailableException
*/
void destroyExpendableRouters(final List<? extends VirtualRouter> routers, final ReservationContext context) throws ResourceUnavailableException;
/**
* areRoutersRunning check if the given list of routers are running
* @param routers list of routers
* @return returns true is all routers are running
*/
boolean areRoutersRunning(final List<? extends VirtualRouter> routers);
}

View File

@ -38,15 +38,11 @@ import java.util.stream.Collectors;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.utils.StringUtils;
import org.apache.log4j.Logger;
import org.apache.cloudstack.acl.ControlledEntity.ACLType;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.engine.cloud.entity.api.db.VMNetworkMapVO;
import org.apache.cloudstack.engine.cloud.entity.api.db.dao.VMNetworkMapDao;
import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService;
import org.apache.cloudstack.framework.config.ConfigDepot;
import org.apache.cloudstack.framework.config.ConfigKey;
import org.apache.cloudstack.framework.config.ConfigKey.Scope;
import org.apache.cloudstack.framework.config.Configurable;
@ -54,8 +50,7 @@ import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.messagebus.MessageBus;
import org.apache.cloudstack.framework.messagebus.PublishScope;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.region.PortableIpDao;
import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
@ -87,7 +82,6 @@ import com.cloud.deploy.DataCenterDeployment;
import com.cloud.deploy.DeployDestination;
import com.cloud.deploy.DeploymentPlan;
import com.cloud.domain.Domain;
import com.cloud.event.dao.UsageEventDao;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.ConnectionException;
import com.cloud.exception.InsufficientAddressCapacityException;
@ -119,6 +113,7 @@ import com.cloud.network.Networks.TrafficType;
import com.cloud.network.PhysicalNetwork;
import com.cloud.network.PhysicalNetworkSetupInfo;
import com.cloud.network.RemoteAccessVpn;
import com.cloud.network.VpcVirtualNetworkApplianceService;
import com.cloud.network.addr.PublicIp;
import com.cloud.network.dao.AccountGuestVlanMapDao;
import com.cloud.network.dao.AccountGuestVlanMapVO;
@ -128,7 +123,6 @@ import com.cloud.network.dao.IPAddressVO;
import com.cloud.network.dao.NetworkAccountDao;
import com.cloud.network.dao.NetworkAccountVO;
import com.cloud.network.dao.NetworkDao;
import com.cloud.network.dao.NetworkDetailsDao;
import com.cloud.network.dao.NetworkDomainDao;
import com.cloud.network.dao.NetworkDomainVO;
import com.cloud.network.dao.NetworkServiceMapDao;
@ -141,7 +135,6 @@ import com.cloud.network.dao.PhysicalNetworkTrafficTypeVO;
import com.cloud.network.dao.PhysicalNetworkVO;
import com.cloud.network.dao.RemoteAccessVpnDao;
import com.cloud.network.dao.RemoteAccessVpnVO;
import com.cloud.network.dao.VpnUserDao;
import com.cloud.network.element.AggregatedCommandExecutor;
import com.cloud.network.element.DhcpServiceProvider;
import com.cloud.network.element.DnsServiceProvider;
@ -183,6 +176,7 @@ import com.cloud.user.User;
import com.cloud.user.dao.AccountDao;
import com.cloud.utils.NumbersUtil;
import com.cloud.utils.Pair;
import com.cloud.utils.StringUtils;
import com.cloud.utils.UuidUtils;
import com.cloud.utils.component.AdapterBase;
import com.cloud.utils.component.ManagerBase;
@ -201,9 +195,9 @@ import com.cloud.utils.db.TransactionStatus;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.fsm.NoTransitionException;
import com.cloud.utils.fsm.StateMachine2;
import com.cloud.utils.net.Dhcp;
import com.cloud.utils.net.NetUtils;
import com.cloud.vm.DomainRouterVO;
import com.cloud.utils.net.Dhcp;
import com.cloud.vm.Nic;
import com.cloud.vm.Nic.ReservationStrategy;
import com.cloud.vm.NicExtraDhcpOptionVO;
@ -287,11 +281,11 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
@Inject
VMNetworkMapDao _vmNetworkMapDao;
@Inject
DomainRouterDao _rotuerDao;
DomainRouterDao _routerDao;
@Inject
RemoteAccessVpnDao _remoteAccessVpnDao;
@Inject
VpnUserDao _vpnUserDao;
VpcVirtualNetworkApplianceService _routerService;
List<NetworkGuru> networkGurus;
@ -369,17 +363,9 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
@Inject
NetworkACLManager _networkACLMgr;
@Inject
UsageEventDao _usageEventDao;
@Inject
NetworkModel _networkModel;
@Inject
NicSecondaryIpDao _nicSecondaryIpDao;
@Inject
PortableIpDao _portableIpDao;
@Inject
ConfigDepot _configDepot;
@Inject
NetworkDetailsDao _networkDetailsDao;
protected StateMachine2<Network.State, Network.Event, Network> _stateMachine;
ScheduledExecutorService _executor;
@ -1147,29 +1133,7 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
}
// get providers to implement
final List<Provider> providersToImplement = getNetworkProviders(network.getId());
for (final NetworkElement element : networkElements) {
if (providersToImplement.contains(element.getProvider())) {
if (!_networkModel.isProviderEnabledInPhysicalNetwork(_networkModel.getPhysicalNetworkId(network), element.getProvider().getName())) {
// The physicalNetworkId will not get translated into a uuid by the reponse serializer,
// because the serializer would look up the NetworkVO class's table and retrieve the
// network id instead of the physical network id.
// So just throw this exception as is. We may need to TBD by changing the serializer.
throw new CloudRuntimeException("Service provider " + element.getProvider().getName() + " either doesn't exist or is not enabled in physical network id: "
+ network.getPhysicalNetworkId());
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Asking " + element.getName() + " to implement " + network);
}
if (!element.implement(network, offering, dest, context)) {
final CloudRuntimeException ex = new CloudRuntimeException("Failed to implement provider " + element.getProvider().getName() + " for network with specified id");
ex.addProxyObject(network.getUuid(), "networkId");
throw ex;
}
}
}
implementNetworkElements(dest, context, network, offering, providersToImplement);
//Reset the extra DHCP option that may have been cleared per nic.
List<NicVO> nicVOs = _nicDao.listByNetworkId(network.getId());
@ -1217,6 +1181,32 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
}
}
private void implementNetworkElements(final DeployDestination dest, final ReservationContext context, final Network network, final NetworkOffering offering, final List<Provider> providersToImplement)
throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
for (NetworkElement element : networkElements) {
if (providersToImplement.contains(element.getProvider())) {
if (!_networkModel.isProviderEnabledInPhysicalNetwork(_networkModel.getPhysicalNetworkId(network), element.getProvider().getName())) {
// The physicalNetworkId will not get translated into a uuid by the reponse serializer,
// because the serializer would look up the NetworkVO class's table and retrieve the
// network id instead of the physical network id.
// So just throw this exception as is. We may need to TBD by changing the serializer.
throw new CloudRuntimeException("Service provider " + element.getProvider().getName() + " either doesn't exist or is not enabled in physical network id: "
+ network.getPhysicalNetworkId());
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Asking " + element.getName() + " to implemenet " + network);
}
if (!element.implement(network, offering, dest, context)) {
CloudRuntimeException ex = new CloudRuntimeException("Failed to implement provider " + element.getProvider().getName() + " for network with specified id");
ex.addProxyObject(network.getUuid(), "networkId");
throw ex;
}
}
}
}
// This method re-programs the rules/ips for existing network
protected boolean reprogramNetworkRules(final long networkId, final Account caller, final Network network) throws ResourceUnavailableException {
boolean success = true;
@ -1235,7 +1225,6 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
success = false;
}
// associate all ip addresses
if (!_ipAddrMgr.applyIpAssociations(network, false)) {
s_logger.warn("Failed to apply ip addresses as a part of network id" + networkId + " restart");
@ -1349,7 +1338,7 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
throw new UnsupportedOperationException("Cannot update the network resources in sequence when providers other than virtualrouter are used");
}
//check if routers are in correct state before proceeding with the update
List<DomainRouterVO> routers=_rotuerDao.listByNetworkAndRole(network.getId(), VirtualRouter.Role.VIRTUAL_ROUTER);
List<DomainRouterVO> routers = _routerDao.listByNetworkAndRole(network.getId(), VirtualRouter.Role.VIRTUAL_ROUTER);
for (DomainRouterVO router : routers){
if (router.getRedundantState() == VirtualRouter.RedundantState.UNKNOWN) {
if (!forced) {
@ -2855,26 +2844,18 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
s_logger.debug("Restarting network " + networkId + "...");
final ReservationContext context = new ReservationContextImpl(null, null, callerUser, callerAccount);
final NetworkOffering offering = _networkOfferingDao.findByIdIncludingRemoved(network.getNetworkOfferingId());
final DeployDestination dest = new DeployDestination(_dcDao.findById(network.getDataCenterId()), null, null, null);
if (cleanup) {
// shutdown the network
s_logger.debug("Shutting down the network id=" + networkId + " as a part of network restart");
if (!shutdownNetworkElementsAndResources(context, true, network)) {
s_logger.debug("Failed to shutdown the network elements and resources as a part of network restart: " + network.getState());
if (!rollingRestartRouters(network, offering, dest, context)) {
setRestartRequired(network, true);
return false;
}
} else {
s_logger.debug("Skip the shutting down of network id=" + networkId);
return true;
}
// implement the network elements and rules again
final DeployDestination dest = new DeployDestination(_dcDao.findById(network.getDataCenterId()), null, null, null);
s_logger.debug("Implementing the network " + network + " elements and resources as a part of network restart");
final NetworkOfferingVO offering = _networkOfferingDao.findById(network.getNetworkOfferingId());
s_logger.debug("Implementing the network " + network + " elements and resources as a part of network restart without cleanup");
try {
implementNetworkElementsAndResources(dest, context, network, offering);
setRestartRequired(network, true);
@ -2885,6 +2866,103 @@ public class NetworkOrchestrator extends ManagerBase implements NetworkOrchestra
}
}
@Override
public void destroyExpendableRouters(final List<? extends VirtualRouter> routers, final ReservationContext context) throws ResourceUnavailableException {
final List<VirtualRouter> remainingRouters = new ArrayList<>();
for (final VirtualRouter router : routers) {
if (router.getState() == VirtualMachine.State.Stopped ||
router.getState() == VirtualMachine.State.Error ||
router.getState() == VirtualMachine.State.Shutdowned ||
router.getState() == VirtualMachine.State.Unknown) {
s_logger.debug("Destroying old router " + router);
_routerService.destroyRouter(router.getId(), context.getAccount(), context.getCaller().getId());
} else {
remainingRouters.add(router);
}
}
if (remainingRouters.size() < 2) {
return;
}
VirtualRouter backupRouter = null;
for (final VirtualRouter router : remainingRouters) {
if (router.getRedundantState() == VirtualRouter.RedundantState.BACKUP) {
backupRouter = router;
}
}
if (backupRouter == null) {
backupRouter = routers.get(routers.size() - 1);
}
if (backupRouter != null) {
_routerService.destroyRouter(backupRouter.getId(), context.getAccount(), context.getCaller().getId());
}
}
@Override
public boolean areRoutersRunning(final List<? extends VirtualRouter> routers) {
for (final VirtualRouter router : routers) {
if (router.getState() != VirtualMachine.State.Running) {
s_logger.debug("Found new router " + router.getInstanceName() + " to be in non-Running state: " + router.getState() + ". Please try restarting network again.");
return false;
}
}
return true;
}
/**
* rollingRestartRouters performs restart of routers of a network by first
* deploying a new VR and then destroying old VRs in rolling fashion. For
* non-redundant network, it will re-program the new router as final step
* otherwise deploys a backup router for the network.
* @param network network to be restarted
* @param offering network offering
* @param dest deployment destination
* @param context reservation context
* @return returns true when the rolling restart operation succeeds
* @throws ResourceUnavailableException
* @throws ConcurrentOperationException
* @throws InsufficientCapacityException
*/
private boolean rollingRestartRouters(final NetworkVO network, final NetworkOffering offering, final DeployDestination dest, final ReservationContext context) throws ResourceUnavailableException, ConcurrentOperationException, InsufficientCapacityException {
s_logger.debug("Performing rolling restart of routers of network " + network);
destroyExpendableRouters(_routerDao.findByNetwork(network.getId()), context);
final List<Provider> providersToImplement = getNetworkProviders(network.getId());
final List<DomainRouterVO> oldRouters = _routerDao.findByNetwork(network.getId());
// Deploy a new router
if (oldRouters.size() > 0) {
network.setRollingRestart(true);
}
implementNetworkElements(dest, context, network, offering, providersToImplement);
if (oldRouters.size() > 0) {
network.setRollingRestart(false);
}
// For redundant network wait for 3*advert_int+skew_seconds for VRRP to kick in
if (network.isRedundant() || (oldRouters.size() == 1 && oldRouters.get(0).getIsRedundantRouter())) {
try {
Thread.sleep(NetworkOrchestrationService.RVRHandoverTime);
} catch (final InterruptedException ignored) {}
}
// Destroy old routers
for (final DomainRouterVO oldRouter : oldRouters) {
_routerService.destroyRouter(oldRouter.getId(), context.getAccount(), context.getCaller().getId());
}
if (network.isRedundant()) {
// Add a new backup router for redundant network
implementNetworkElements(dest, context, network, offering, providersToImplement);
} else {
// Re-apply rules for non-redundant network
implementNetworkElementsAndResources(dest, context, network, offering);
}
return areRoutersRunning(_routerDao.findByNetwork(network.getId()));
}
private void setRestartRequired(final NetworkVO network, final boolean restartRequired) {
s_logger.debug("Marking network " + network + " with restartRequired=" + restartRequired);
network.setRestartRequired(restartRequired);

View File

@ -116,6 +116,7 @@ public class NetworkDaoImpl extends GenericDaoBase<NetworkVO, Long>implements Ne
AllFieldsSearch.and("broadcastUri", AllFieldsSearch.entity().getBroadcastUri(), Op.EQ);
AllFieldsSearch.and("vpcId", AllFieldsSearch.entity().getVpcId(), Op.EQ);
AllFieldsSearch.and("aclId", AllFieldsSearch.entity().getNetworkACLId(), Op.EQ);
AllFieldsSearch.and("redundant", AllFieldsSearch.entity().isRedundant(), Op.EQ);
final SearchBuilder<NetworkOfferingVO> join1 = _ntwkOffDao.createSearchBuilder();
join1.and("isSystem", join1.entity().isSystemOnly(), Op.EQ);
join1.and("isRedundant", join1.entity().getRedundantRouter(), Op.EQ);
@ -656,7 +657,7 @@ public class NetworkDaoImpl extends GenericDaoBase<NetworkVO, Long>implements Ne
@Override
public List<NetworkVO> listRedundantNetworks() {
final SearchCriteria<NetworkVO> sc = AllFieldsSearch.create();
sc.setJoinParameters("offerings", "isRedundant", true);
sc.setParameters("redundant", true);
return listBy(sc, null);
}

View File

@ -178,13 +178,8 @@ public class NetworkVO implements Network {
@Transient
transient String vlanIdAsUUID;
public String getVlanIdAsUUID() {
return vlanIdAsUUID;
}
public void setVlanIdAsUUID(String vlanIdAsUUID) {
this.vlanIdAsUUID = vlanIdAsUUID;
}
@Transient
boolean rollingRestart = false;
public NetworkVO() {
uuid = UUID.randomUUID().toString();
@ -650,4 +645,20 @@ public class NetworkVO implements Network {
public void setExternalId(String externalId) {
this.externalId = externalId;
}
public String getVlanIdAsUUID() {
return vlanIdAsUUID;
}
public void setVlanIdAsUUID(String vlanIdAsUUID) {
this.vlanIdAsUUID = vlanIdAsUUID;
}
public boolean isRollingRestart() {
return rollingRestart;
}
public void setRollingRestart(boolean rollingRestart) {
this.rollingRestart = rollingRestart;
}
}

View File

@ -25,6 +25,7 @@ import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.Transient;
import com.cloud.utils.db.GenericDao;
@ -88,6 +89,9 @@ public class VpcVO implements Vpc {
@Column(name = "region_level_vpc")
boolean regionLevelVpc = false;
@Transient
boolean rollingRestart = false;
public VpcVO() {
uuid = UUID.randomUUID().toString();
}
@ -228,6 +232,15 @@ public class VpcVO implements Vpc {
redundant = isRedundant;
}
@Override
public boolean isRollingRestart() {
return rollingRestart;
}
public void setRollingRestart(boolean rollingRestart) {
this.rollingRestart = rollingRestart;
}
@Override
public Class<?> getEntityType() {
return Vpc.class;

View File

@ -2241,6 +2241,7 @@ public class ApiResponseHelper implements ResponseGenerator {
response.setNetworkSpannedZones(networkSpannedZones);
}
response.setExternalId(network.getExternalId());
response.setRedundantRouter(network.isRedundant());
response.setObjectName("network");
return response;
}

View File

@ -76,7 +76,9 @@ public class DomainRouterJoinDaoImpl extends GenericDaoBase<DomainRouterJoinVO,
routerResponse.setCreated(router.getCreated());
routerResponse.setState(router.getState());
routerResponse.setIsRedundantRouter(router.isRedundantRouter());
if (router.getRedundantState() != null) {
routerResponse.setRedundantState(router.getRedundantState().toString());
}
if (router.getTemplateVersion() != null) {
String routerVersion = Version.trimRouterVersion(router.getTemplateVersion());
routerResponse.setVersion(routerVersion);

View File

@ -1842,7 +1842,7 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService {
@Override
@ActionEvent(eventType = EventTypes.EVENT_NETWORK_RESTART, eventDescription = "restarting network", async = true)
public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
// This method restarts all network elements belonging to the network and re-applies all the rules
Long networkId = cmd.getNetworkId();
@ -1872,6 +1872,14 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService {
_accountMgr.checkAccess(callerAccount, null, true, network);
if (!network.isRedundant() && makeRedundant) {
network.setRedundant(true);
if (!_networksDao.update(network.getId(), network)) {
throw new CloudRuntimeException("Failed to update network into a redundant one, please try again");
}
cleanup = true;
}
boolean success = _networkMgr.restartNetwork(networkId, callerAccount, callerUser, cleanup);
if (success) {

View File

@ -226,11 +226,11 @@ NetworkMigrationResponder, AggregatedCommandExecutor, RedundantResource, DnsServ
final List<DomainRouterVO> routers = routerDeploymentDefinition.deployVirtualRouter();
int routerCounts = 1;
if (offering.getRedundantRouter()) {
routerCounts = 2;
int expectedRouters = 1;
if (offering.getRedundantRouter() || network.isRollingRestart()) {
expectedRouters = 2;
}
if (routers == null || routers.size() < routerCounts) {
if (routers == null || routers.size() < expectedRouters) {
//we might have a router which is already deployed and running.
//so check the no of routers in network currently.
List<DomainRouterVO> current_routers = _routerDao.listByNetworkAndRole(network.getId(), Role.VIRTUAL_ROUTER);

View File

@ -33,13 +33,11 @@ import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import javax.annotation.PostConstruct;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger;
import org.apache.cloudstack.acl.ControlledEntity.ACLType;
import org.apache.cloudstack.api.command.user.vpc.ListPrivateGatewaysCmd;
import org.apache.cloudstack.api.command.user.vpc.ListStaticRoutesCmd;
@ -47,6 +45,9 @@ import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger;
import com.cloud.configuration.Config;
import com.cloud.configuration.Resource.ResourceType;
import com.cloud.dc.DataCenter;
@ -88,6 +89,7 @@ import com.cloud.network.dao.NetworkVO;
import com.cloud.network.element.NetworkElement;
import com.cloud.network.element.StaticNatServiceProvider;
import com.cloud.network.element.VpcProvider;
import com.cloud.network.router.VpcVirtualNetworkApplianceManager;
import com.cloud.network.vpc.VpcOffering.State;
import com.cloud.network.vpc.dao.NetworkACLDao;
import com.cloud.network.vpc.dao.PrivateIpDao;
@ -132,8 +134,10 @@ import com.cloud.utils.db.TransactionStatus;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.exception.ExceptionUtil;
import com.cloud.utils.net.NetUtils;
import com.cloud.vm.DomainRouterVO;
import com.cloud.vm.ReservationContext;
import com.cloud.vm.ReservationContextImpl;
import com.cloud.vm.dao.DomainRouterDao;
public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvisioningService, VpcService {
private static final Logger s_logger = Logger.getLogger(VpcManagerImpl.class);
@ -196,6 +200,10 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis
NetworkACLManager _networkAclMgr;
@Inject
IpAddressManager _ipAddrMgr;
@Inject
VpcVirtualNetworkApplianceManager _routerService;
@Inject
DomainRouterDao _routerDao;
@Inject
private VpcPrivateGatewayTransactionCallable vpcTxCallable;
@ -1175,7 +1183,7 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis
for (final VpcProvider element : getVpcElements()) {
if (providersToImplement.contains(element.getProvider())) {
if (element.implementVpc(vpc, dest, context)) {
s_logger.debug("Vpc " + vpc + " has started succesfully");
s_logger.debug("Vpc " + vpc + " has started successfully");
} else {
s_logger.warn("Vpc " + vpc + " failed to start");
success = false;
@ -1482,33 +1490,36 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis
public boolean restartVpc(final long vpcId, final boolean cleanUp, final boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException,
InsufficientCapacityException {
final Account caller = CallContext.current().getCallingAccount();
final Account callerAccount = CallContext.current().getCallingAccount();
final User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId());
final ReservationContext context = new ReservationContextImpl(null, null, callerUser, callerAccount);
// Verify input parameters
final Vpc vpc = getActiveVpc(vpcId);
Vpc vpc = getActiveVpc(vpcId);
if (vpc == null) {
final InvalidParameterValueException ex = new InvalidParameterValueException("Unable to find Enabled VPC by id specified");
ex.addProxyObject(String.valueOf(vpcId), "VPC");
throw ex;
}
_accountMgr.checkAccess(caller, null, false, vpc);
_accountMgr.checkAccess(callerAccount, null, false, vpc);
s_logger.debug("Restarting VPC " + vpc);
boolean restartRequired = false;
try {
boolean forceCleanup = cleanUp;
if (!vpc.isRedundant() && makeRedundant) {
final VpcOfferingVO redundantOffering = _vpcOffDao.findByUniqueName(VpcOffering.redundantVPCOfferingName);
final VpcVO entity = _vpcDao.findById(vpcId);
entity.setRedundant(makeRedundant);
entity.setRedundant(true);
entity.setVpcOfferingId(redundantOffering.getId());
// Change the VPC in order to get it updated after the end of
// the restart procedure.
_vpcDao.update(vpc.getId(), entity);
if (_vpcDao.update(vpc.getId(), entity)) {
vpc = entity;
}
// If the offering and redundant column are changing, force the
// clean up.
@ -1516,17 +1527,15 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis
}
if (forceCleanup) {
s_logger.debug("Shutting down VPC " + vpc + " as a part of VPC restart process");
if (!shutdownVpc(vpcId)) {
s_logger.warn("Failed to shutdown vpc as a part of VPC " + vpc + " restart process");
if (!rollingRestartVpc(vpc, context)) {
s_logger.warn("Failed to execute a rolling restart as a part of VPC " + vpc + " restart process");
restartRequired = true;
return false;
}
} else {
s_logger.info("Will not shutdown vpc as a part of VPC " + vpc + " restart process.");
return true;
}
s_logger.debug("Starting VPC " + vpc + " as a part of VPC restart process");
s_logger.debug("Starting VPC " + vpc + " as a part of VPC restart process without cleanup");
if (!startVpc(vpcId, false)) {
s_logger.warn("Failed to start vpc as a part of VPC " + vpc + " restart process");
restartRequired = true;
@ -2435,4 +2444,55 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis
final Map<Network.Service, Set<Network.Provider>> vpcOffSvcProvidersMap = getVpcOffSvcProvidersMap(vpcOfferingId);
return vpcOffSvcProvidersMap.get(Network.Service.SourceNat).contains(Network.Provider.VPCVirtualRouter);
}
/**
* rollingRestartVpc performs restart of routers of a VPC by first
* deploying a new VR and then destroying old VRs in rolling fashion. For
* non-redundant VPC, it will re-program the new router as final step
* otherwise deploys a backup router for the VPC.
* @param vpc vpc to be restarted
* @param context reservation context
* @return returns true when the rolling restart succeeds
* @throws ResourceUnavailableException
* @throws ConcurrentOperationException
* @throws InsufficientCapacityException
*/
private boolean rollingRestartVpc(final Vpc vpc, final ReservationContext context) throws ResourceUnavailableException, ConcurrentOperationException, InsufficientCapacityException {
s_logger.debug("Performing rolling restart of routers of VPC " + vpc);
_ntwkMgr.destroyExpendableRouters(_routerDao.listByVpcId(vpc.getId()), context);
final DeployDestination dest = new DeployDestination(_dcDao.findById(vpc.getZoneId()), null, null, null);
final List<DomainRouterVO> oldRouters = _routerDao.listByVpcId(vpc.getId());
// Create a new router
if (oldRouters.size() > 0) {
vpc.setRollingRestart(true);
}
startVpc(vpc, dest, context);
if (oldRouters.size() > 0) {
vpc.setRollingRestart(false);
}
// For redundant vpc wait for 3*advert_int+skew_seconds for VRRP to kick in
if (vpc.isRedundant() || (oldRouters.size() == 1 && oldRouters.get(0).getIsRedundantRouter())) {
try {
Thread.sleep(NetworkOrchestrationService.RVRHandoverTime);
} catch (final InterruptedException ignored) {
}
}
// Destroy old routers
for (final DomainRouterVO oldRouter : oldRouters) {
_routerService.destroyRouter(oldRouter.getId(), context.getAccount(), context.getCaller().getId());
}
// Re-program VPC VR or add a new backup router for redundant VPC
if (!startVpc(vpc, dest, context)) {
s_logger.debug("Failed to re-program VPC router or deploy a new backup router for VPC" + vpc);
return false;
}
return _ntwkMgr.areRoutersRunning(_routerDao.listByVpcId(vpc.getId()));
}
}

View File

@ -148,6 +148,10 @@ public class RouterDeploymentDefinition {
return guestNetwork.isRedundant();
}
public boolean isRollingRestart() {
return guestNetwork.isRollingRestart();
}
public DeploymentPlan getPlan() {
return plan;
}
@ -316,7 +320,7 @@ public class RouterDeploymentDefinition {
// If old network is redundant but new is single router, then
// routers.size() = 2 but routerCount = 1
int routersExpected = 1;
if (isRedundant()) {
if (isRedundant() || isRollingRestart()) {
routersExpected = 2;
}
return routersExpected < routers.size() ? 0 : routersExpected - routers.size();

View File

@ -194,4 +194,9 @@ public class VpcRouterDeploymentDefinition extends RouterDeploymentDefinition {
public boolean isRedundant() {
return vpc.isRedundant();
}
@Override
public boolean isRollingRestart() {
return vpc.isRollingRestart();
}
}

View File

@ -68,6 +68,7 @@ import com.cloud.network.element.NetworkElement;
import com.cloud.network.element.StaticNatServiceProvider;
import com.cloud.network.element.UserDataServiceProvider;
import com.cloud.network.guru.NetworkGuru;
import com.cloud.network.router.VirtualRouter;
import com.cloud.network.rules.LoadBalancerContainer.Scheme;
import com.cloud.network.vpc.Vpc;
import com.cloud.offering.NetworkOffering;
@ -215,7 +216,7 @@ public class MockNetworkManagerImpl extends ManagerBase implements NetworkOrches
* @see com.cloud.network.NetworkService#restartNetwork(com.cloud.api.commands.RestartNetworkCmd, boolean)
*/
@Override
public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup) throws ConcurrentOperationException, ResourceUnavailableException,
public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException,
InsufficientCapacityException {
// TODO Auto-generated method stub
return false;
@ -914,6 +915,15 @@ public class MockNetworkManagerImpl extends ManagerBase implements NetworkOrches
return null;
}
@Override
public void destroyExpendableRouters(final List<? extends VirtualRouter> routers, final ReservationContext context) throws ResourceUnavailableException {
}
@Override
public boolean areRoutersRunning(final List<? extends VirtualRouter> routers) {
return false;
}
@Override
public void finalizeUpdateInSequence(Network network, boolean success) {
return;

View File

@ -628,7 +628,9 @@ class CsIP:
def arpPing(self):
cmd = "arping -c 1 -I %s -A -U -s %s %s" % (
self.dev, self.address['public_ip'], self.address['gateway'])
CsHelper.execute(cmd)
if not self.cl.is_redundant() and (not self.address['gateway'] or self.address['gateway'] == "None"):
cmd = "arping -c 1 -I %s -A -U %s" % (self.dev, self.address['public_ip'])
CsHelper.execute2(cmd, False)
# Delete any ips that are configured but not in the bag
def compare(self, bag):

View File

@ -333,13 +333,13 @@ setup_common() {
fi
# Workaround to activate vSwitch under VMware
timeout 3 ping -n -c 3 $GW || true
timeout 3 ping -n -c 3 $GW &
if [ -n "$MGMTNET" -a -n "$LOCAL_GW" ]
then
timeout 3 ping -n -c 3 $LOCAL_GW || true
timeout 3 ping -n -c 3 $LOCAL_GW &
#This code is added to address ARP issue by pinging MGMT_GW
MGMT_GW=$(echo $MGMTNET | awk -F "." '{print $1"."$2"."$3".1"}')
timeout 3 ping -n -c 3 $MGMT_GW || true
timeout 3 ping -n -c 3 $MGMT_GW &
fi
if [ "$HYPERVISOR" == "vmware" ]; then

View File

@ -65,9 +65,6 @@ do
systemctl disable --no-block --now $svc
done
# Enable SSH by default
systemctl enable --no-block --now ssh
# Restore the persistent iptables nat, rules and filters for IPv4 and IPv6 if they exist
ipv4="/etc/iptables/rules.v4"
if [ -e $ipv4 ]
@ -81,5 +78,8 @@ then
ip6tables-restore < $ipv6
fi
# Enable SSH by default
systemctl enable --no-block --now ssh
date > /var/cache/cloud/boot_up_done
logger -t cloud "Boot up process done"

View File

@ -31,9 +31,6 @@ vrrp_instance inside_network {
nopreempt
advert_int 1
garp_master_delay 1
garp_master_repeat 10
garp_master_refresh 5
#use_vmac
#vmac_xmit_base

View File

@ -1100,11 +1100,23 @@
});
args.$form.find('.form-item[rel=cleanup]').find('input').attr('checked', 'checked'); //checked
args.$form.find('.form-item[rel=cleanup]').css('display', 'inline-block'); //shown
args.$form.find('.form-item[rel=makeredundant]').find('input').attr('checked', 'checked'); //checked
args.$form.find('.form-item[rel=makeredundant]').css('display', 'inline-block'); //shown
if (Boolean(args.context.networks[0].redundantrouter)) {
args.$form.find('.form-item[rel=makeredundant]').hide();
} else {
args.$form.find('.form-item[rel=makeredundant]').show();
}
},
fields: {
cleanup: {
label: 'label.clean.up',
isBoolean: true
},
makeredundant: {
label: 'label.make.redundant',
isBoolean: true
}
}
},
@ -1114,10 +1126,13 @@
}
},
action: function(args) {
var array1 = [];
array1.push("&cleanup=" + (args.data.cleanup == "on"));
$.ajax({
url: createURL("restartNetwork&id=" + args.context.networks[0].id + array1.join("")),
url: createURL("restartNetwork"),
data: {
id: args.context.networks[0].id,
cleanup: (args.data.cleanup == "on"),
makeredundant: (args.data.makeredundant == "on")
},
dataType: "json",
async: true,
success: function(json) {
@ -1416,10 +1431,20 @@
label: 'label.reserved.ip.range'
},
redundantrouter: {
label: 'label.redundant.router',
converter: function(booleanValue) {
if (booleanValue == true) {
return "Yes";
}
return "No";
}
},
networkdomaintext: {
label: 'label.network.domain.text'
},
networkdomain: {
label: 'label.network.domain',
isEditable: true