Health check feature for virtual router (#3575)

This commit is contained in:
Anurag Awasthi 2020-01-30 17:09:03 +05:30 committed by GitHub
parent f1149bcb09
commit c0abfce8fa
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
78 changed files with 3404 additions and 452 deletions

View File

@ -19,6 +19,13 @@ package com.cloud.event;
import java.util.HashMap;
import java.util.Map;
import org.apache.cloudstack.acl.Role;
import org.apache.cloudstack.acl.RolePermission;
import org.apache.cloudstack.annotation.Annotation;
import org.apache.cloudstack.config.Configuration;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.usage.Usage;
import com.cloud.dc.DataCenter;
import com.cloud.dc.Pod;
import com.cloud.dc.StorageNetworkIpRange;
@ -69,12 +76,6 @@ import com.cloud.user.User;
import com.cloud.vm.Nic;
import com.cloud.vm.NicSecondaryIp;
import com.cloud.vm.VirtualMachine;
import org.apache.cloudstack.acl.Role;
import org.apache.cloudstack.acl.RolePermission;
import org.apache.cloudstack.annotation.Annotation;
import org.apache.cloudstack.config.Configuration;
import org.apache.cloudstack.ha.HAConfig;
import org.apache.cloudstack.usage.Usage;
public class EventTypes {
@ -106,6 +107,7 @@ public class EventTypes {
public static final String EVENT_ROUTER_HA = "ROUTER.HA";
public static final String EVENT_ROUTER_UPGRADE = "ROUTER.UPGRADE";
public static final String EVENT_ROUTER_DIAGNOSTICS = "ROUTER.DIAGNOSTICS";
public static final String EVENT_ROUTER_HEALTH_CHECKS = "ROUTER.HEALTH.CHECKS";
// Console proxy
public static final String EVENT_PROXY_CREATE = "PROXY.CREATE";
@ -603,6 +605,7 @@ public class EventTypes {
entityEventDetails.put(EVENT_ROUTER_HA, VirtualRouter.class);
entityEventDetails.put(EVENT_ROUTER_UPGRADE, VirtualRouter.class);
entityEventDetails.put(EVENT_ROUTER_DIAGNOSTICS, VirtualRouter.class);
entityEventDetails.put(EVENT_ROUTER_HEALTH_CHECKS, VirtualRouter.class);
entityEventDetails.put(EVENT_PROXY_CREATE, VirtualMachine.class);
entityEventDetails.put(EVENT_PROXY_DESTROY, VirtualMachine.class);

View File

@ -72,7 +72,9 @@ public interface NetworkService {
boolean deleteNetwork(long networkId, boolean forced);
boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
int getActiveNicsInNetwork(long networkId);

View File

@ -0,0 +1,34 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.network;
import java.util.Date;
public interface RouterHealthCheckResult {
long getRouterId();
String getCheckName();
String getCheckType();
boolean getCheckResult();
Date getLastUpdateTime();
String getParsedCheckDetails();
}

View File

@ -31,8 +31,7 @@ public interface VirtualNetworkApplianceService {
/**
* Starts domain router
*
* @param cmd
* the command specifying router's id
* @param cmd the command specifying router's id
* @return DomainRouter object
*/
VirtualRouter startRouter(long routerId, boolean reprogramNetwork) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
@ -51,10 +50,8 @@ public interface VirtualNetworkApplianceService {
/**
* Stops domain router
*
* @param id
* of the router
* @param forced
* just do it. caller knows best.
* @param id of the router
* @param forced just do it. caller knows best.
* @return router if successful, null otherwise
* @throws ResourceUnavailableException
* @throws ConcurrentOperationException
@ -68,4 +65,13 @@ public interface VirtualNetworkApplianceService {
VirtualRouter findRouter(long routerId);
List<Long> upgradeRouterTemplate(UpgradeRouterTemplateCmd cmd);
/**
* Updates router with latest health checkdata, runs health checks and persists health checks on virtual router if feasible.
* Throws relevant exception if feature is disabled or failures occur.
*
* @param routerId id of the router
* @return
*/
boolean performRouterHealthChecks(long routerId);
}

View File

@ -21,6 +21,7 @@ import java.util.Map;
import org.apache.cloudstack.api.command.user.vpc.ListPrivateGatewaysCmd;
import org.apache.cloudstack.api.command.user.vpc.ListStaticRoutesCmd;
import org.apache.cloudstack.api.command.user.vpc.RestartVPCCmd;
import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientAddressCapacityException;
@ -29,6 +30,7 @@ import com.cloud.exception.NetworkRuleConflictException;
import com.cloud.exception.ResourceAllocationException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.network.IpAddress;
import com.cloud.user.User;
import com.cloud.utils.Pair;
public interface VpcService {
@ -132,7 +134,9 @@ public interface VpcService {
* @return
* @throws InsufficientCapacityException
*/
boolean restartVpc(long id, boolean cleanUp, boolean makeredundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
boolean restartVpc(RestartVPCCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
boolean restartVpc(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException;
/**
* Returns a Private gateway found in the VPC by id

View File

@ -727,6 +727,7 @@ public class ApiConstants {
public static final String VIRTUAL_SIZE = "virtualsize";
public static final String NETSCALER_CONTROLCENTER_ID = "netscalercontrolcenterid";
public static final String NETSCALER_SERVICEPACKAGE_ID = "netscalerservicepackageid";
public static final String FETCH_ROUTER_HEALTH_CHECK_RESULTS = "fetchhealthcheckresults";
public static final String ZONE_ID_LIST = "zoneids";
public static final String DESTINATION_ZONE_ID_LIST = "destzoneids";
@ -748,6 +749,13 @@ public class ApiConstants {
public static final String FILES = "files";
public static final String VOLUME_IDS = "volumeids";
public static final String ROUTER_ID = "routerid";
public static final String ROUTER_HEALTH_CHECKS = "healthchecks";
public static final String ROUTER_CHECK_NAME = "checkname";
public static final String ROUTER_CHECK_TYPE = "checktype";
public static final String LAST_UPDATED = "lastupdated";
public static final String PERFORM_FRESH_CHECKS = "performfreshchecks";
public enum HostDetails {
all, capacity, events, stats, min;
}

View File

@ -22,6 +22,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse;
import org.apache.cloudstack.management.ManagementServerHost;
import org.apache.cloudstack.affinity.AffinityGroup;
import org.apache.cloudstack.affinity.AffinityGroupResponse;
@ -146,6 +147,7 @@ import com.cloud.network.PhysicalNetwork;
import com.cloud.network.PhysicalNetworkServiceProvider;
import com.cloud.network.PhysicalNetworkTrafficType;
import com.cloud.network.RemoteAccessVpn;
import com.cloud.network.RouterHealthCheckResult;
import com.cloud.network.Site2SiteCustomerGateway;
import com.cloud.network.Site2SiteVpnConnection;
import com.cloud.network.Site2SiteVpnGateway;
@ -466,4 +468,6 @@ public interface ResponseGenerator {
SSHKeyPairResponse createSSHKeyPairResponse(SSHKeyPair sshkeyPair, boolean privatekey);
ManagementServerResponse createManagementResponse(ManagementServerHost mgmt);
List<RouterHealthCheckResultResponse> createHealthCheckResponse(VirtualMachine router, List<RouterHealthCheckResult> healthCheckResults);
}

View File

@ -16,6 +16,7 @@
// under the License.
package org.apache.cloudstack.api.command.admin.internallb;
import org.apache.commons.lang.BooleanUtils;
import org.apache.log4j.Logger;
import org.apache.cloudstack.api.APICommand;
@ -73,6 +74,11 @@ public class ListInternalLBVMsCmd extends BaseListProjectAndAccountResourcesCmd
@Parameter(name = ApiConstants.FOR_VPC, type = CommandType.BOOLEAN, description = "if true is passed for this parameter, list only VPC Internal LB VMs")
private Boolean forVpc;
@Parameter(name = ApiConstants.FETCH_ROUTER_HEALTH_CHECK_RESULTS, type = CommandType.BOOLEAN, since = "4.14",
description = "if true is passed for this parameter, also fetch last executed health check results for the VM. Default is false")
private Boolean fetchHealthCheckResults;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
@ -117,6 +123,10 @@ public class ListInternalLBVMsCmd extends BaseListProjectAndAccountResourcesCmd
return Role.INTERNAL_LB_VM.toString();
}
public boolean shouldFetchHealthCheckResults() {
return BooleanUtils.isTrue(fetchHealthCheckResults);
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////

View File

@ -0,0 +1,117 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.command.admin.router;
import java.util.List;
import org.apache.cloudstack.api.APICommand;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.ApiErrorCode;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.ServerApiException;
import org.apache.cloudstack.api.response.DomainRouterResponse;
import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse;
import org.apache.cloudstack.api.response.RouterHealthCheckResultsListResponse;
import org.apache.cloudstack.context.CallContext;
import org.apache.commons.lang.BooleanUtils;
import org.apache.log4j.Logger;
import com.cloud.exception.InvalidParameterValueException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.network.router.VirtualRouter;
import com.cloud.user.Account;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.vm.VirtualMachine;
@APICommand(name = GetRouterHealthCheckResultsCmd.APINAME,
responseObject = RouterHealthCheckResultsListResponse.class,
description = "Starts a router.",
entityType = {VirtualMachine.class},
requestHasSensitiveInfo = false,
responseHasSensitiveInfo = false,
since = "4.14.0")
public class GetRouterHealthCheckResultsCmd extends BaseCmd {
public static final Logger s_logger = Logger.getLogger(GetRouterHealthCheckResultsCmd.class.getName());
public static final String APINAME = "getRouterHealthCheckResults";
/////////////////////////////////////////////////////
//////////////// API parameters /////////////////////
/////////////////////////////////////////////////////
@Parameter(name = ApiConstants.ROUTER_ID, type = CommandType.UUID, entityType = DomainRouterResponse.class,
required = true, description = "the ID of the router")
private Long routerId;
@Parameter(name = ApiConstants.PERFORM_FRESH_CHECKS, type = CommandType.BOOLEAN, description = "if true is passed for this parameter, " +
"health checks are performed on the fly. Else last performed checks data is fetched")
private Boolean performFreshChecks;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
public Long getRouterId() {
return routerId;
}
public boolean shouldPerformFreshChecks() {
return BooleanUtils.isTrue(performFreshChecks);
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////
@Override
public String getCommandName() {
return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX;
}
@Override
public long getEntityOwnerId() {
VirtualRouter router = _entityMgr.findById(VirtualRouter.class, getRouterId());
if (router != null) {
return router.getAccountId();
}
return Account.ACCOUNT_ID_SYSTEM;
}
@Override
public void execute() throws ResourceUnavailableException, InvalidParameterValueException, ServerApiException {
CallContext.current().setEventDetails("Router Id: " + this._uuidMgr.getUuid(VirtualMachine.class, getRouterId()));
VirtualRouter router = _routerService.findRouter(getRouterId());
if (router == null || router.getRole() != VirtualRouter.Role.VIRTUAL_ROUTER) {
throw new InvalidParameterValueException("Can't find router by routerId");
}
try {
List<RouterHealthCheckResultResponse> healthChecks = _queryService.listRouterHealthChecks(this);
RouterHealthCheckResultsListResponse routerResponse = new RouterHealthCheckResultsListResponse();
routerResponse.setRouterId(router.getUuid());
routerResponse.setHealthChecks(healthChecks);
routerResponse.setObjectName("routerhealthchecks");
routerResponse.setResponseName(getCommandName());
setResponseObject(routerResponse);
} catch (CloudRuntimeException ex){
ex.printStackTrace();
throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to execute command due to exception: " + ex.getLocalizedMessage());
}
}
}

View File

@ -16,6 +16,7 @@
// under the License.
package org.apache.cloudstack.api.command.admin.router;
import org.apache.commons.lang.BooleanUtils;
import org.apache.log4j.Logger;
import org.apache.cloudstack.api.APICommand;
@ -80,6 +81,10 @@ public class ListRoutersCmd extends BaseListProjectAndAccountResourcesCmd {
@Parameter(name = ApiConstants.VERSION, type = CommandType.STRING, description = "list virtual router elements by version")
private String version;
@Parameter(name = ApiConstants.FETCH_ROUTER_HEALTH_CHECK_RESULTS, type = CommandType.BOOLEAN, since = "4.14",
description = "if true is passed for this parameter, also fetch last executed health check results for the router. Default is false")
private Boolean fetchHealthCheckResults;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
@ -132,6 +137,11 @@ public class ListRoutersCmd extends BaseListProjectAndAccountResourcesCmd {
return Role.VIRTUAL_ROUTER.toString();
}
public boolean shouldFetchHealthCheckResults() {
return BooleanUtils.isTrue(fetchHealthCheckResults);
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////

View File

@ -96,7 +96,7 @@ public class RestartNetworkCmd extends BaseAsyncCmd {
@Override
public void execute() throws ResourceUnavailableException, ResourceAllocationException, ConcurrentOperationException, InsufficientCapacityException {
boolean result = _networkService.restartNetwork(this, getCleanup(), getMakeRedundant());
boolean result = _networkService.restartNetwork(this);
if (result) {
SuccessResponse response = new SuccessResponse(getCommandName());
setResponseObject(response);

View File

@ -91,7 +91,7 @@ public class RestartVPCCmd extends BaseAsyncCmd {
@Override
public void execute() {
try {
final boolean result = _vpcService.restartVpc(getId(), getCleanup(), getMakeredundant());
final boolean result = _vpcService.restartVpc(this);
if (result) {
final SuccessResponse response = new SuccessResponse(getCommandName());
setResponseObject(response);

View File

@ -18,10 +18,9 @@ package org.apache.cloudstack.api.response;
import java.util.Date;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Set;
import com.google.gson.annotations.SerializedName;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.api.EntityReference;
@ -29,6 +28,7 @@ import org.apache.cloudstack.api.EntityReference;
import com.cloud.serializer.Param;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.State;
import com.google.gson.annotations.SerializedName;
@EntityReference(value = VirtualMachine.class)
@SuppressWarnings("unused")
@ -217,6 +217,14 @@ public class DomainRouterResponse extends BaseResponse implements ControlledView
@Param(description = "true if the router template requires upgrader")
private boolean requiresUpgrade;
@SerializedName("healthchecksfailed")
@Param(description = "true if any health checks had failed")
private boolean healthChecksFailed;
@SerializedName("healthcheckresults")
@Param(description = "Last executed health check result for the router", responseObject = RouterHealthCheckResultResponse.class, since = "4.14")
List<RouterHealthCheckResultResponse> healthCheckResults;
public DomainRouterResponse() {
nics = new LinkedHashSet<NicResponse>();
}
@ -278,6 +286,14 @@ public class DomainRouterResponse extends BaseResponse implements ControlledView
return hypervisor;
}
public List<RouterHealthCheckResultResponse> getHealthCheckResults() {
return healthCheckResults;
}
public boolean getHealthChecksFailed() {
return healthChecksFailed;
}
public void setHypervisor(String hypervisor) {
this.hypervisor = hypervisor;
}
@ -446,4 +462,12 @@ public class DomainRouterResponse extends BaseResponse implements ControlledView
public void setRequiresUpgrade(boolean requiresUpgrade) {
this.requiresUpgrade = requiresUpgrade;
}
public void setHealthChecksFailed(boolean healthChecksFailed) {
this.healthChecksFailed = healthChecksFailed;
}
public void setHealthCheckResults(List<RouterHealthCheckResultResponse> healthCheckResults) {
this.healthCheckResults = healthCheckResults;
}
}

View File

@ -0,0 +1,88 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.response;
import java.util.Date;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import com.cloud.serializer.Param;
import com.google.gson.annotations.SerializedName;
public class RouterHealthCheckResultResponse extends BaseResponse {
@SerializedName(ApiConstants.ROUTER_CHECK_NAME)
@Param(description = "the name of the health check on the router")
private String checkName;
@SerializedName(ApiConstants.ROUTER_CHECK_TYPE)
@Param(description = "the type of the health check - basic or advanced")
private String checkType;
@SerializedName(ApiConstants.RESULT)
@Param(description = "result of the health check")
private boolean result;
@SerializedName(ApiConstants.LAST_UPDATED)
@Param(description = "the date this VPC was created")
private Date lastUpdated;
@SerializedName(ApiConstants.DETAILS)
@Param(description = "detailed response generated on running health check")
private String details;
public String getCheckName() {
return checkName;
}
public String getCheckType() {
return checkType;
}
public boolean getResult() {
return result;
}
public Date getLastUpdated() {
return lastUpdated;
}
public String getDetails() {
return details;
}
public void setCheckName(String checkName) {
this.checkName = checkName;
}
public void setCheckType(String checkType) {
this.checkType = checkType;
}
public void setResult(boolean result) {
this.result = result;
}
public void setLastUpdated(Date lastUpdated) {
this.lastUpdated = lastUpdated;
}
public void setDetails(String details) {
this.details = details;
}
}

View File

@ -0,0 +1,52 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.response;
import java.util.List;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import com.cloud.serializer.Param;
import com.google.gson.annotations.SerializedName;
public class RouterHealthCheckResultsListResponse extends BaseResponse {
@SerializedName(ApiConstants.ROUTER_ID)
@Param(description = "the id of the router")
private String routerId;
@SerializedName(ApiConstants.ROUTER_HEALTH_CHECKS)
@Param(description = "the id of the router")
private List<RouterHealthCheckResultResponse> healthChecks;
public String getRouterId() {
return routerId;
}
public List<RouterHealthCheckResultResponse> getHealthChecks() {
return healthChecks;
}
public void setRouterId(String routerId) {
this.routerId = routerId;
}
public void setHealthChecks(List<RouterHealthCheckResultResponse> healthChecks) {
this.healthChecks = healthChecks;
}
}

View File

@ -24,6 +24,7 @@ import org.apache.cloudstack.api.command.admin.host.ListHostTagsCmd;
import org.apache.cloudstack.api.command.admin.host.ListHostsCmd;
import org.apache.cloudstack.api.command.admin.internallb.ListInternalLBVMsCmd;
import org.apache.cloudstack.api.command.admin.management.ListMgmtsCmd;
import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd;
import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd;
import org.apache.cloudstack.api.command.admin.storage.ListImageStoresCmd;
import org.apache.cloudstack.api.command.admin.storage.ListSecondaryStagingStoresCmd;
@ -68,6 +69,7 @@ import org.apache.cloudstack.api.response.ProjectInvitationResponse;
import org.apache.cloudstack.api.response.ProjectResponse;
import org.apache.cloudstack.api.response.ResourceDetailResponse;
import org.apache.cloudstack.api.response.ResourceTagResponse;
import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse;
import org.apache.cloudstack.api.response.SecurityGroupResponse;
import org.apache.cloudstack.api.response.ServiceOfferingResponse;
import org.apache.cloudstack.api.response.StoragePoolResponse;
@ -172,4 +174,6 @@ public interface QueryService {
ListResponse<ManagementServerResponse> listManagementServers(ListMgmtsCmd cmd);
ListResponse<TemplateOVFPropertyResponse> listTemplateOVFProperties(ListTemplateOVFProperties cmd);
List<RouterHealthCheckResultResponse> listRouterHealthChecks(GetRouterHealthCheckResultsCmd cmd);
}

View File

@ -0,0 +1,46 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.agent.api.routing;
import java.util.List;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.Command;
public class GetRouterMonitorResultsAnswer extends Answer {
private List<String> failingChecks;
private String monitoringResults;
protected GetRouterMonitorResultsAnswer() {
super();
}
public GetRouterMonitorResultsAnswer(Command cmd, boolean success, List<String> failingChecks, String monitoringResults) {
super(cmd, success, monitoringResults);
this.failingChecks = failingChecks;
this.monitoringResults = monitoringResults;
}
public List<String> getFailingChecks() {
return failingChecks;
}
public String getMonitoringResults() {
return monitoringResults;
}
}

View File

@ -0,0 +1,38 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.agent.api.routing;
public class GetRouterMonitorResultsCommand extends NetworkElementCommand {
private boolean performFreshChecks;
protected GetRouterMonitorResultsCommand() {
}
public GetRouterMonitorResultsCommand(boolean performFreshChecks) {
this.performFreshChecks = performFreshChecks;
}
@Override
public boolean isQuery() {
return true;
}
public boolean shouldPerformFreshChecks() {
return performFreshChecks;
}
}

View File

@ -0,0 +1,41 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.agent.api.routing;
import java.util.HashMap;
import java.util.Map;
/**
* Loads new and updates old configuration details on VR for health checks.
*/
public class LoadRouterHealthChecksConfigCommand extends NetworkElementCommand {
private Map<String, String> details;
protected LoadRouterHealthChecksConfigCommand() {
details = new HashMap<>();
}
public void addDetail(String key, String value) {
this.details.put(key, value);
}
public Map<String, String> getDetails() {
return details;
}
}

View File

@ -38,7 +38,6 @@ public abstract class NetworkElementCommand extends Command {
public static final String GUEST_BRIDGE = "guest.bridge";
public static final String VPC_PRIVATE_GATEWAY = "vpc.gateway.private";
public static final String FIREWALL_EGRESS_DEFAULT = "firewall.egress.default";
public static final String ROUTER_MONITORING_ENABLE = "router.monitor.enable";
public static final String NETWORK_PUB_LAST_IP = "network.public.last.ip";
private String routerAccessIp;

View File

@ -20,6 +20,9 @@
package com.cloud.agent.api.routing;
import java.util.List;
import java.util.Map;
import org.apache.commons.collections.CollectionUtils;
import com.cloud.agent.api.to.MonitorServiceTO;
@ -29,13 +32,24 @@ import com.cloud.agent.api.to.MonitorServiceTO;
* how to access the components inside the command.
*/
public class SetMonitorServiceCommand extends NetworkElementCommand {
MonitorServiceTO[] services;
public static final String ROUTER_MONITORING_ENABLED = "router.monitor.enabled";
public static final String ROUTER_HEALTH_CHECKS_ENABLED = "router.health.checks.enabled";
public static final String ROUTER_HEALTH_CHECKS_BASIC_INTERVAL = "router.health.checks.basic.interval";
public static final String ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL = "router.health.checks.advanced.interval";
public static final String ROUTER_HEALTH_CHECKS_EXCLUDED = "router.health.checks.excluded";
private MonitorServiceTO[] services;
private Map<String, String> healthChecksConfig;
private boolean reconfigureAfterUpdate;
private boolean deleteFromProcessedCache;
protected SetMonitorServiceCommand() {
}
public SetMonitorServiceCommand(List<MonitorServiceTO> services) {
this.services = services.toArray(new MonitorServiceTO[services.size()]);
if (CollectionUtils.isNotEmpty(services)) {
this.services = services.toArray(new MonitorServiceTO[services.size()]);
}
}
public MonitorServiceTO[] getRules() {
@ -43,7 +57,9 @@ public class SetMonitorServiceCommand extends NetworkElementCommand {
}
public String getConfiguration() {
if (services == null) {
return null;
}
StringBuilder sb = new StringBuilder();
for (MonitorServiceTO service : services) {
sb.append("[").append(service.getService()).append("]").append(":");
@ -55,4 +71,28 @@ public class SetMonitorServiceCommand extends NetworkElementCommand {
return sb.toString();
}
public Map<String, String> getHealthChecksConfig() {
return healthChecksConfig;
}
public void setHealthChecksConfig(Map<String, String> healthChecksConfig) {
this.healthChecksConfig = healthChecksConfig;
}
public boolean shouldReconfigureAfterUpdate() {
return reconfigureAfterUpdate;
}
public void setReconfigureAfterUpdate(boolean reconfigureAfterUpdate) {
this.reconfigureAfterUpdate = reconfigureAfterUpdate;
}
public boolean shouldDeleteFromProcessedCache() {
return deleteFromProcessedCache;
}
public void setDeleteFromProcessedCache(boolean deleteFromProcessedCache) {
this.deleteFromProcessedCache = deleteFromProcessedCache;
}
}

View File

@ -47,6 +47,8 @@ public class VRScripts {
// New scripts for use with chef
public static final String UPDATE_CONFIG = "update_config.py";
public static final String CONFIGURE = "configure.py";
// Script still in use - mostly by HyperV
public static final String S2SVPN_CHECK = "checkbatchs2svpn.sh";
@ -66,6 +68,7 @@ public class VRScripts {
public static final String VPC_STATIC_ROUTE = "vpc_staticroute.sh";
public static final String VPN_L2TP = "vpn_l2tp.sh";
public static final String UPDATE_HOST_PASSWD = "update_host_passwd.sh";
public static final String ROUTER_MONITOR_RESULTS = "getRouterMonitorResults.sh";
public static final String VR_CFG = "vr_cfg.sh";

View File

@ -22,13 +22,6 @@ package com.cloud.agent.resource.virtualnetwork;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.channels.SocketChannel;
import org.apache.cloudstack.diagnostics.DeleteFileInVrCommand;
import org.apache.cloudstack.diagnostics.DiagnosticsAnswer;
import org.apache.cloudstack.diagnostics.DiagnosticsCommand;
import org.apache.cloudstack.diagnostics.PrepareFilesAnswer;
import org.apache.cloudstack.diagnostics.PrepareFilesCommand;
import org.joda.time.Duration;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
@ -45,8 +38,14 @@ import org.apache.cloudstack.ca.SetupCertificateAnswer;
import org.apache.cloudstack.ca.SetupCertificateCommand;
import org.apache.cloudstack.ca.SetupKeyStoreCommand;
import org.apache.cloudstack.ca.SetupKeystoreAnswer;
import org.apache.cloudstack.diagnostics.DeleteFileInVrCommand;
import org.apache.cloudstack.diagnostics.DiagnosticsAnswer;
import org.apache.cloudstack.diagnostics.DiagnosticsCommand;
import org.apache.cloudstack.diagnostics.PrepareFilesAnswer;
import org.apache.cloudstack.diagnostics.PrepareFilesCommand;
import org.apache.cloudstack.utils.security.KeyStoreUtils;
import org.apache.log4j.Logger;
import org.joda.time.Duration;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckRouterAnswer;
@ -59,6 +58,8 @@ import com.cloud.agent.api.GetRouterAlertsAnswer;
import com.cloud.agent.api.routing.AggregationControlCommand;
import com.cloud.agent.api.routing.AggregationControlCommand.Action;
import com.cloud.agent.api.routing.GetRouterAlertsCommand;
import com.cloud.agent.api.routing.GetRouterMonitorResultsAnswer;
import com.cloud.agent.api.routing.GetRouterMonitorResultsCommand;
import com.cloud.agent.api.routing.GroupAnswer;
import com.cloud.agent.api.routing.NetworkElementCommand;
import com.cloud.agent.resource.virtualnetwork.facade.AbstractConfigItemFacade;
@ -204,6 +205,8 @@ public class VirtualRoutingResource {
return execute((PrepareFilesCommand) cmd);
} else if (cmd instanceof DeleteFileInVrCommand) {
return execute((DeleteFileInVrCommand)cmd);
} else if (cmd instanceof GetRouterMonitorResultsCommand) {
return execute((GetRouterMonitorResultsCommand)cmd);
} else {
s_logger.error("Unknown query command in VirtualRoutingResource!");
return Answer.createUnsupportedCommandAnswer(cmd);
@ -225,10 +228,7 @@ public class VirtualRoutingResource {
throw new CloudRuntimeException("Unable to apply unknown configitem of type " + c.getClass().getSimpleName());
}
private Answer applyConfig(NetworkElementCommand cmd, List<ConfigItem> cfg) {
if (cfg.isEmpty()) {
return new Answer(cmd, true, "Nothing to do");
}
@ -256,7 +256,6 @@ public class VirtualRoutingResource {
s_logger.warn("Expected " + cmd.getAnswersCount() + " answers while executing " + cmd.getClass().getSimpleName() + " but received " + results.size());
}
if (results.size() == 1) {
return new Answer(cmd, finalResult, results.get(0).getDetails());
} else {
@ -275,6 +274,60 @@ public class VirtualRoutingResource {
return new CheckS2SVpnConnectionsAnswer(cmd, result.isSuccess(), result.getDetails());
}
private List<String> getFailingChecks(String line) {
List<String> failingChecks = new ArrayList<>();
for (String w : line.split(",")) {
if (!w.trim().isEmpty()) {
failingChecks.add(w.trim());
}
}
return failingChecks;
}
private GetRouterMonitorResultsAnswer parseLinesForHealthChecks(GetRouterMonitorResultsCommand cmd, String executionResult) {
List<String> failingChecks = new ArrayList<>();
StringBuilder monitorResults = new StringBuilder();
String[] lines = executionResult.trim().split("\n");
boolean readingFailedChecks = false, readingMonitorResults = false;
for (String line : lines) {
line = line.trim();
if (line.contains("FAILING CHECKS")) { // Toggle to reading failing checks from next line
readingFailedChecks = true;
readingMonitorResults = false;
} else if (line.contains("MONITOR RESULTS")) { // Toggle to reading monitor results from next line
readingFailedChecks = false;
readingMonitorResults = true;
} else if (readingFailedChecks && !readingMonitorResults) { // Reading failing checks section
failingChecks.addAll(getFailingChecks(line));
} else if (!readingFailedChecks && readingMonitorResults) { // Reading monitor checks result
monitorResults.append(line);
} else {
s_logger.error("Unexpected lines reached while parsing health check response. Skipping line:- " + line);
}
}
return new GetRouterMonitorResultsAnswer(cmd, true, failingChecks, monitorResults.toString());
}
private GetRouterMonitorResultsAnswer execute(GetRouterMonitorResultsCommand cmd) {
String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP);
String args = cmd.shouldPerformFreshChecks() ? "true" : "false";
s_logger.info("Fetching health check result for " + routerIp + " and executing fresh checks: " + args);
ExecutionResult result = _vrDeployer.executeInVR(routerIp, VRScripts.ROUTER_MONITOR_RESULTS, args);
if (!result.isSuccess()) {
s_logger.warn("Result of " + cmd + " failed with details: " + result.getDetails());
return new GetRouterMonitorResultsAnswer(cmd, false, null, result.getDetails());
}
if (result.getDetails().isEmpty()) {
s_logger.warn("Result of " + cmd + " received no details.");
return new GetRouterMonitorResultsAnswer(cmd, false, null, "No results available.");
}
return parseLinesForHealthChecks(cmd, result.getDetails());
}
private GetRouterAlertsAnswer execute(GetRouterAlertsCommand cmd) {
String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP);

View File

@ -127,7 +127,10 @@ public abstract class AbstractConfigItemFacade {
final ConfigItem configFile = new FileConfigItem(VRScripts.CONFIG_PERSIST_LOCATION, remoteFilename, gson.toJson(configuration));
cfg.add(configFile);
final ConfigItem updateCommand = new ScriptConfigItem(VRScripts.UPDATE_CONFIG, remoteFilename);
// By default keep files in processed cache on VR
final String args = configuration.shouldDeleteFromProcessedCache() ? remoteFilename + " false" : remoteFilename;
final ConfigItem updateCommand = new ScriptConfigItem(VRScripts.UPDATE_CONFIG, args);
cfg.add(updateCommand);
return cfg;

View File

@ -21,21 +21,56 @@ package com.cloud.agent.resource.virtualnetwork.facade;
import java.util.List;
import org.apache.log4j.Logger;
import com.cloud.agent.api.routing.NetworkElementCommand;
import com.cloud.agent.api.routing.SetMonitorServiceCommand;
import com.cloud.agent.resource.virtualnetwork.ConfigItem;
import com.cloud.agent.resource.virtualnetwork.ScriptConfigItem;
import com.cloud.agent.resource.virtualnetwork.VRScripts;
import com.cloud.agent.resource.virtualnetwork.model.ConfigBase;
import com.cloud.agent.resource.virtualnetwork.model.MonitorService;
public class SetMonitorServiceConfigItem extends AbstractConfigItemFacade {
private static final Logger s_logger = Logger.getLogger(SetMonitorServiceConfigItem.class);
@Override
public List<ConfigItem> generateConfig(final NetworkElementCommand cmd) {
final SetMonitorServiceCommand command = (SetMonitorServiceCommand) cmd;
final MonitorService monitorService = new MonitorService(command.getConfiguration(), cmd.getAccessDetail(NetworkElementCommand.ROUTER_MONITORING_ENABLE));
return generateConfigItems(monitorService);
final MonitorService monitorService = new MonitorService(
command.getConfiguration(),
cmd.getAccessDetail(SetMonitorServiceCommand.ROUTER_MONITORING_ENABLED),
cmd.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED));
setupHealthChecksRelatedInfo(monitorService, command);
monitorService.setDeleteFromProcessedCache(command.shouldDeleteFromProcessedCache());
List<ConfigItem> configItems = generateConfigItems(monitorService);
if (configItems != null && command.shouldReconfigureAfterUpdate()) {
configItems.add(new ScriptConfigItem(VRScripts.CONFIGURE, "monitor_service.json"));
}
return configItems;
}
private void setupHealthChecksRelatedInfo(MonitorService monitorService, SetMonitorServiceCommand command) {
try {
monitorService.setHealthChecksBasicRunInterval(Integer.parseInt(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL)));
} catch (NumberFormatException exception) {
s_logger.error("Unexpected health check basic interval set" + command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL) +
". Exception: " + exception + "Will use default value");
}
try {
monitorService.setHealthChecksAdvancedRunInterval(Integer.parseInt(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL)));
} catch (NumberFormatException exception) {
s_logger.error("Unexpected health check advanced interval set" + command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL) +
". Exception: " + exception + "Will use default value");
}
monitorService.setExcludedHealthChecks(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED));
monitorService.setHealthChecksConfig(command.getHealthChecksConfig());
}
@Override

View File

@ -41,6 +41,10 @@ public abstract class ConfigBase {
private String type = UNKNOWN;
// For use in update_config.py which by default persists files in /var/cache/cloud/processed
// If true we don't keep the file in cache. Useful for monitor service command to avoid space waste
protected boolean deleteFromProcessedCache;
private ConfigBase() {
// Empty constructor for (de)serialization
}
@ -57,4 +61,7 @@ public abstract class ConfigBase {
this.type = type;
}
public boolean shouldDeleteFromProcessedCache() {
return deleteFromProcessedCache;
}
}

View File

@ -19,34 +19,84 @@
package com.cloud.agent.resource.virtualnetwork.model;
import java.util.Map;
public class MonitorService extends ConfigBase {
public String config, disableMonitoring;
public Boolean healthChecksEnabled;
public Integer healthChecksBasicRunInterval;
public Integer healthChecksAdvancedRunInterval;
public String excludedHealthChecks;
public Map<String, String> healthChecksConfig;
public MonitorService() {
super(ConfigBase.MONITORSERVICE);
}
public MonitorService(String config, String disableMonitoring) {
public MonitorService(String config, String disableMonitoring, String healthChecksEnabled) {
super(ConfigBase.MONITORSERVICE);
this.config = config;
this.disableMonitoring = disableMonitoring;
this.healthChecksEnabled = Boolean.parseBoolean(healthChecksEnabled);
}
public String getConfig() {
return config;
}
public void setConfig(String config) {
this.config = config;
}
public String getDisableMonitoring() {
return disableMonitoring;
}
public Boolean getHealthChecksEnabled() {
return healthChecksEnabled;
}
public Integer getHealthChecksBasicRunInterval() {
return healthChecksBasicRunInterval;
}
public Integer getHealthChecksAdvancedRunInterval() {
return healthChecksAdvancedRunInterval;
}
public String getExcludedHealthChecks() {
return excludedHealthChecks;
}
public Map<String, String> getHealthChecksConfig() {
return healthChecksConfig;
}
public void setConfig(String config) {
this.config = config;
}
public void setDisableMonitoring(String disableMonitoring) {
this.disableMonitoring = disableMonitoring;
}
public void setHealthChecksEnabled(Boolean healthChecksEnabled) {
this.healthChecksEnabled = healthChecksEnabled;
}
public void setHealthChecksBasicRunInterval(Integer healthChecksBasicRunInterval) {
this.healthChecksBasicRunInterval = healthChecksBasicRunInterval;
}
public void setHealthChecksAdvancedRunInterval(Integer healthChecksAdvancedRunInterval) {
this.healthChecksAdvancedRunInterval = healthChecksAdvancedRunInterval;
}
public void setExcludedHealthChecks(String excludedHealthChecks) {
this.excludedHealthChecks = excludedHealthChecks;
}
public void setHealthChecksConfig(Map<String, String> healthChecksConfig) {
this.healthChecksConfig = healthChecksConfig;
}
public void setDeleteFromProcessedCache(boolean deleteFromProcessedCache) {
this.deleteFromProcessedCache = deleteFromProcessedCache;
}
}

View File

@ -30,7 +30,6 @@ import java.util.concurrent.ExecutionException;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.storage.VolumeApiService;
import org.apache.cloudstack.api.command.admin.vm.MigrateVMCmd;
import org.apache.cloudstack.api.command.admin.volume.MigrateVolumeCmdByAdmin;
import org.apache.cloudstack.api.command.user.volume.MigrateVolumeCmd;
@ -106,6 +105,7 @@ import com.cloud.storage.StoragePool;
import com.cloud.storage.VMTemplateStorageResourceAssoc;
import com.cloud.storage.Volume;
import com.cloud.storage.Volume.Type;
import com.cloud.storage.VolumeApiService;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.dao.SnapshotDao;
import com.cloud.storage.dao.VolumeDao;

View File

@ -0,0 +1,49 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.network.dao;
import java.util.List;
import com.cloud.utils.db.GenericDao;
public interface RouterHealthCheckResultDao extends GenericDao<RouterHealthCheckResultVO, Long> {
/**
* @param routerId
* @return Returns all the health checks in the database for the given router id
*/
List<RouterHealthCheckResultVO> getHealthCheckResults(long routerId);
boolean expungeHealthChecks(long routerId);
/**
* @param routerId
* @return true if there are checks that have been marked failed in the database
*/
boolean hasFailingChecks(long routerId);
/**
* For a router, we have only one (check name, check type) possible as we keep the most
* recent check result. This method finds that last check result.
*
* @param routerId
* @param checkName
* @param checkType
* @return returns the check result for the routerId, check type and the check name.
*/
RouterHealthCheckResultVO getRouterHealthCheckResult(long routerId, String checkName, String checkType);
}

View File

@ -0,0 +1,84 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.network.dao;
import java.util.List;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
@Component
public class RouterHealthCheckResultDaoImpl extends GenericDaoBase<RouterHealthCheckResultVO, Long> implements RouterHealthCheckResultDao {
private final static Logger s_logger = Logger.getLogger(RouterHealthCheckResultDaoImpl.class);
private SearchBuilder<RouterHealthCheckResultVO> RouterChecksSearchBuilder;
private SearchBuilder<RouterHealthCheckResultVO> IsRouterFailingSearchBuilder;
protected RouterHealthCheckResultDaoImpl() {
super();
RouterChecksSearchBuilder = createSearchBuilder();
RouterChecksSearchBuilder.and("routerId", RouterChecksSearchBuilder.entity().getRouterId(), SearchCriteria.Op.EQ);
RouterChecksSearchBuilder.and("checkName", RouterChecksSearchBuilder.entity().getCheckName(), SearchCriteria.Op.EQ);
RouterChecksSearchBuilder.and("checkType", RouterChecksSearchBuilder.entity().getCheckType(), SearchCriteria.Op.EQ);
RouterChecksSearchBuilder.done();
IsRouterFailingSearchBuilder = createSearchBuilder();
IsRouterFailingSearchBuilder.and("routerId", IsRouterFailingSearchBuilder.entity().getRouterId(), SearchCriteria.Op.EQ);
IsRouterFailingSearchBuilder.and("checkResult", IsRouterFailingSearchBuilder.entity().getCheckResult(), SearchCriteria.Op.EQ);
IsRouterFailingSearchBuilder.done();
}
@Override
public List<RouterHealthCheckResultVO> getHealthCheckResults(long routerId) {
SearchCriteria<RouterHealthCheckResultVO> sc = RouterChecksSearchBuilder.create();
sc.setParameters("routerId", routerId);
return listBy(sc);
}
@Override
public boolean expungeHealthChecks(long routerId) {
SearchCriteria<RouterHealthCheckResultVO> sc = RouterChecksSearchBuilder.create();
sc.setParameters("routerId", routerId);
return expunge(sc) > 0;
}
@Override
public RouterHealthCheckResultVO getRouterHealthCheckResult(long routerId, String checkName, String checkType) {
SearchCriteria<RouterHealthCheckResultVO> sc = RouterChecksSearchBuilder.create();
sc.setParameters("routerId", routerId);
sc.setParameters("checkName", checkName);
sc.setParameters("checkType", checkType);
List<RouterHealthCheckResultVO> checks = listBy(sc);
if (checks.size() > 1) {
s_logger.error("Found multiple entries for router Id: " + routerId + ", check name: " + checkName);
}
return checks.isEmpty() ? null : checks.get(0);
}
@Override
public boolean hasFailingChecks(long routerId) {
SearchCriteria<RouterHealthCheckResultVO> sc = IsRouterFailingSearchBuilder.create();
sc.setParameters("routerId", routerId);
sc.setParameters("checkResult", false);
return !listBy(sc).isEmpty();
}
}

View File

@ -0,0 +1,129 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.network.dao;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import com.cloud.network.RouterHealthCheckResult;
import com.cloud.utils.StringUtils;
@Entity
@Table(name = "router_health_check")
public class RouterHealthCheckResultVO implements RouterHealthCheckResult {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id", updatable = false, nullable = false)
private long id;
@Column(name = "router_id", updatable = false, nullable = false)
private long routerId;
@Column(name = "check_name", updatable = false, nullable = false)
private String checkName;
@Column(name = "check_type", updatable = false, nullable = false)
private String checkType;
@Column(name = "check_result")
private boolean checkResult;
@Temporal(TemporalType.TIMESTAMP)
@Column(name = "last_update", updatable = true, nullable = true)
private Date lastUpdateTime;
@Column(name = "check_details", updatable = true, nullable = true)
private byte[] checkDetails;
protected RouterHealthCheckResultVO() {
}
public RouterHealthCheckResultVO(long routerId, String checkName, String checkType) {
this.routerId = routerId;
this.checkName = checkName;
this.checkType = checkType;
}
public long getId() {
return id;
}
@Override
public long getRouterId() {
return routerId;
}
@Override
public String getCheckName() {
return checkName;
}
@Override
public String getCheckType() {
return checkType;
}
@Override
public boolean getCheckResult() {
return checkResult;
}
@Override
public Date getLastUpdateTime() {
return lastUpdateTime;
}
@Override
public String getParsedCheckDetails() {
return checkDetails != null ? new String(checkDetails, StringUtils.getPreferredCharset()) : "";
}
public byte[] getCheckDetails() {
return checkDetails;
}
public void setCheckResult(boolean checkResult) {
this.checkResult = checkResult;
}
public void setLastUpdateTime(Date lastUpdateTime) {
this.lastUpdateTime = lastUpdateTime;
}
public void setCheckDetails(byte[] checkDetails) {
this.checkDetails = checkDetails;
}
@Override
public String toString() {
return super.toString() +
"- check type: " + checkType +
",check name: " + checkName +
", check result: " + checkResult +
", check last update: " + lastUpdateTime +
", details: " + getParsedCheckDetails();
}
}

View File

@ -288,4 +288,5 @@
<bean id="directDownloadCertificateDaoImpl" class="org.apache.cloudstack.direct.download.DirectDownloadCertificateDaoImpl" />
<bean id="directDownloadCertificateHostMapDaoImpl" class="org.apache.cloudstack.direct.download.DirectDownloadCertificateHostMapDaoImpl" />
<bean id="templateOVFPropertiesDaoImpl" class="com.cloud.storage.dao.TemplateOVFPropertiesDaoImpl" />
<bean id="routerHealthCheckResultsDaoImpl" class="com.cloud.network.dao.RouterHealthCheckResultDaoImpl" />
</beans>

View File

@ -36,3 +36,18 @@ UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=283 AND display_name=
UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=284 AND display_name="Red Hat Enterprise Linux 7.5";
UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=285 AND display_name="Red Hat Enterprise Linux 7.6";
UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=286 AND display_name="Red Hat Enterprise Linux 8.0";
-- Create table for router health checks. We only save last check result for each.
CREATE TABLE `cloud`.`router_health_check` (
`id` bigint unsigned NOT NULL auto_increment,
`router_id` bigint unsigned NOT NULL COMMENT 'router id',
`check_name` varchar(255) NOT NULL COMMENT 'name of the health check',
`check_type` varchar(255) NOT NULL COMMENT 'type of the health check',
`last_update` DATETIME NULL COMMENT 'last check update time',
`check_result` boolean NOT NULL COMMENT 'check executions success or failure',
`check_details` BLOB NULL COMMENT 'check result detailed message',
PRIMARY KEY (`id`),
CONSTRAINT `fk_router_health_checks__router_id` FOREIGN KEY (`router_id`) REFERENCES `domain_router`(`id`) ON DELETE CASCADE,
UNIQUE `i_router_health_checks__router_id__check_name__check_type`(`router_id`, `check_name`, `check_type`),
INDEX `i_router_health_checks__router_id`(`router_id`)
) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8;

View File

@ -22,6 +22,17 @@
// Automatically generated by addcopyright.py at 04/03/2012
package com.cloud.baremetal.networkservice;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import javax.naming.ConfigurationException;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.log4j.Logger;
import com.cloud.agent.IAgentControl;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckNetworkAnswer;
@ -70,15 +81,6 @@ import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.PowerState;
import com.cloud.vm.dao.VMInstanceDao;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.log4j.Logger;
import javax.naming.ConfigurationException;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.TimeUnit;
public class BareMetalResourceBase extends ManagerBase implements ServerResource {
private static final Logger s_logger = Logger.getLogger(BareMetalResourceBase.class);

View File

@ -2085,6 +2085,11 @@ public class HypervDirectConnectResource extends ServerResourceBase implements S
final String controlIp = getRouterSshControlIp(cmd);
final String config = cmd.getConfiguration();
if (org.apache.commons.lang.StringUtils.isBlank(config)) {
s_logger.error("SetMonitorServiceCommand should have config for this case");
return new Answer(cmd, false, "SetMonitorServiceCommand failed due to missing config");
}
final String args = String.format(" %s %s", "-c", config);
final String command = String.format("%s%s %s", "/opt/cloud/bin/", VRScripts.MONITOR_SERVICE, args);

View File

@ -24,30 +24,6 @@ import java.util.UUID;
import javax.inject.Inject;
import junit.framework.TestCase;
import net.juniper.contrail.api.ApiConnector;
import net.juniper.contrail.api.ApiConnectorFactory;
import net.juniper.contrail.api.ApiConnectorMock;
import net.juniper.contrail.api.types.InstanceIp;
import net.juniper.contrail.api.types.NetworkIpam;
import net.juniper.contrail.api.types.Project;
import net.juniper.contrail.api.types.SubnetType;
import net.juniper.contrail.api.types.VirtualMachine;
import net.juniper.contrail.api.types.VirtualMachineInterface;
import net.juniper.contrail.api.types.VirtualNetwork;
import net.juniper.contrail.api.types.VnSubnetsType;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.context.support.AbstractApplicationContext;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.command.user.address.AssociateIPAddrCmd;
@ -58,6 +34,16 @@ import org.apache.cloudstack.api.command.user.project.CreateProjectCmd;
import org.apache.cloudstack.api.command.user.project.DeleteProjectCmd;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.context.support.AbstractApplicationContext;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import com.cloud.agent.AgentManager;
import com.cloud.dc.DataCenter;
@ -84,6 +70,19 @@ import com.cloud.utils.db.SearchCriteria.Op;
import com.cloud.utils.mgmt.JmxUtil;
import com.cloud.vm.VirtualMachineManager;
import junit.framework.TestCase;
import net.juniper.contrail.api.ApiConnector;
import net.juniper.contrail.api.ApiConnectorFactory;
import net.juniper.contrail.api.ApiConnectorMock;
import net.juniper.contrail.api.types.InstanceIp;
import net.juniper.contrail.api.types.NetworkIpam;
import net.juniper.contrail.api.types.Project;
import net.juniper.contrail.api.types.SubnetType;
import net.juniper.contrail.api.types.VirtualMachine;
import net.juniper.contrail.api.types.VirtualMachineInterface;
import net.juniper.contrail.api.types.VirtualNetwork;
import net.juniper.contrail.api.types.VnSubnetsType;
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = "classpath:/providerContext.xml")
/**

View File

@ -18,6 +18,23 @@
*/
package org.apache.cloudstack.storage.datastore.provider;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.inject.Inject;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.HypervisorHostListener;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.storage.datastore.util.DateraObject;
import org.apache.cloudstack.storage.datastore.util.DateraUtil;
import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.ModifyStoragePoolAnswer;
@ -41,21 +58,6 @@ import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.dao.VMInstanceDao;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.HypervisorHostListener;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.storage.datastore.util.DateraObject;
import org.apache.cloudstack.storage.datastore.util.DateraUtil;
import org.apache.log4j.Logger;
import javax.inject.Inject;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class DateraHostListener implements HypervisorHostListener {
private static final Logger s_logger = Logger.getLogger(DateraHostListener.class);

View File

@ -24,6 +24,33 @@ import java.util.Map;
import javax.inject.Inject;
import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult;
import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult;
import org.apache.cloudstack.engine.subsystem.api.storage.DataObject;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStore;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreCapabilities;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.ObjectInDataStoreStateMachine;
import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver;
import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.TemplateInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory;
import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo;
import org.apache.cloudstack.framework.async.AsyncCompletionCallback;
import org.apache.cloudstack.storage.command.CommandResult;
import org.apache.cloudstack.storage.command.CreateObjectAnswer;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreVO;
import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailVO;
import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.storage.datastore.util.SolidFireUtil;
import org.apache.cloudstack.storage.to.SnapshotObjectTO;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.to.DataObjectType;
import com.cloud.agent.api.to.DataStoreTO;
@ -39,12 +66,12 @@ import com.cloud.storage.DataStoreRole;
import com.cloud.storage.ResizeVolumePayload;
import com.cloud.storage.Snapshot.State;
import com.cloud.storage.SnapshotVO;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.storage.StoragePool;
import com.cloud.storage.VMTemplateStoragePoolVO;
import com.cloud.storage.Volume;
import com.cloud.storage.VolumeDetailVO;
import com.cloud.storage.VolumeVO;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.storage.dao.SnapshotDao;
import com.cloud.storage.dao.SnapshotDetailsDao;
import com.cloud.storage.dao.SnapshotDetailsVO;
@ -57,36 +84,8 @@ import com.cloud.user.AccountVO;
import com.cloud.user.dao.AccountDao;
import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.exception.CloudRuntimeException;
import com.google.common.base.Preconditions;
import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult;
import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult;
import org.apache.cloudstack.engine.subsystem.api.storage.DataObject;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStore;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreCapabilities;
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver;
import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.TemplateInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory;
import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo;
import org.apache.cloudstack.engine.subsystem.api.storage.ObjectInDataStoreStateMachine;
import org.apache.cloudstack.framework.async.AsyncCompletionCallback;
import org.apache.cloudstack.storage.command.CommandResult;
import org.apache.cloudstack.storage.command.CreateObjectAnswer;
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreDao;
import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreVO;
import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailVO;
import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao;
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
import org.apache.cloudstack.storage.datastore.util.SolidFireUtil;
import org.apache.cloudstack.storage.to.SnapshotObjectTO;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
public class SolidFirePrimaryDataStoreDriver implements PrimaryDataStoreDriver {
private static final Logger LOGGER = Logger.getLogger(SolidFirePrimaryDataStoreDriver.class);
private static final int LOWEST_HYPERVISOR_SNAPSHOT_RESERVE = 10;

View File

@ -31,8 +31,6 @@ import java.util.stream.Collectors;
import javax.inject.Inject;
import com.cloud.vm.snapshot.VMSnapshotVO;
import com.cloud.vm.snapshot.dao.VMSnapshotDao;
import org.apache.cloudstack.acl.ControlledEntity;
import org.apache.cloudstack.acl.ControlledEntity.ACLType;
import org.apache.cloudstack.affinity.AffinityGroup;
@ -63,6 +61,7 @@ import org.apache.cloudstack.api.response.CreateCmdResponse;
import org.apache.cloudstack.api.response.CreateSSHKeyPairResponse;
import org.apache.cloudstack.api.response.DiskOfferingResponse;
import org.apache.cloudstack.api.response.DomainResponse;
import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse;
import org.apache.cloudstack.api.response.DomainRouterResponse;
import org.apache.cloudstack.api.response.EventResponse;
import org.apache.cloudstack.api.response.ExtractResponse;
@ -235,6 +234,7 @@ import com.cloud.network.PhysicalNetwork;
import com.cloud.network.PhysicalNetworkServiceProvider;
import com.cloud.network.PhysicalNetworkTrafficType;
import com.cloud.network.RemoteAccessVpn;
import com.cloud.network.RouterHealthCheckResult;
import com.cloud.network.Site2SiteCustomerGateway;
import com.cloud.network.Site2SiteVpnConnection;
import com.cloud.network.Site2SiteVpnGateway;
@ -336,6 +336,8 @@ import com.cloud.vm.VirtualMachine.Type;
import com.cloud.vm.dao.NicExtraDhcpOptionDao;
import com.cloud.vm.dao.NicSecondaryIpVO;
import com.cloud.vm.snapshot.VMSnapshot;
import com.cloud.vm.snapshot.VMSnapshotVO;
import com.cloud.vm.snapshot.dao.VMSnapshotDao;
public class ApiResponseHelper implements ResponseGenerator {
@ -1349,6 +1351,7 @@ public class ApiResponseHelper implements ResponseGenerator {
return listVrs.get(0);
}
@Override
public SystemVmResponse createSystemVmResponse(VirtualMachine vm) {
SystemVmResponse vmResponse = new SystemVmResponse();
@ -4205,4 +4208,20 @@ public class ApiResponseHelper implements ResponseGenerator {
response.setState(mgmt.getState());
return response;
}
@Override
public List<RouterHealthCheckResultResponse> createHealthCheckResponse(VirtualMachine router, List<RouterHealthCheckResult> healthCheckResults) {
List<RouterHealthCheckResultResponse> responses = new ArrayList<>(healthCheckResults.size());
for (RouterHealthCheckResult hcResult : healthCheckResults) {
RouterHealthCheckResultResponse healthCheckResponse = new RouterHealthCheckResultResponse();
healthCheckResponse.setObjectName("routerhealthchecks");
healthCheckResponse.setCheckName(hcResult.getCheckName());
healthCheckResponse.setCheckType(hcResult.getCheckType());
healthCheckResponse.setResult(hcResult.getCheckResult());
healthCheckResponse.setLastUpdated(hcResult.getLastUpdateTime());
healthCheckResponse.setDetails(hcResult.getParsedCheckDetails());
responses.add(healthCheckResponse);
}
return responses;
}
}

View File

@ -31,9 +31,6 @@ import java.util.stream.Stream;
import javax.inject.Inject;
import com.cloud.agent.api.storage.OVFProperty;
import com.cloud.storage.TemplateOVFPropertyVO;
import com.cloud.storage.dao.TemplateOVFPropertiesDao;
import org.apache.cloudstack.acl.ControlledEntity.ACLType;
import org.apache.cloudstack.affinity.AffinityGroupDomainMapVO;
import org.apache.cloudstack.affinity.AffinityGroupResponse;
@ -42,6 +39,7 @@ import org.apache.cloudstack.affinity.dao.AffinityGroupDomainMapDao;
import org.apache.cloudstack.affinity.dao.AffinityGroupVMMapDao;
import org.apache.cloudstack.api.BaseListProjectAndAccountResourcesCmd;
import org.apache.cloudstack.api.ResourceDetail;
import org.apache.cloudstack.api.ResponseGenerator;
import org.apache.cloudstack.api.ResponseObject.ResponseView;
import org.apache.cloudstack.api.command.admin.account.ListAccountsCmdByAdmin;
import org.apache.cloudstack.api.command.admin.domain.ListDomainsCmd;
@ -51,6 +49,7 @@ import org.apache.cloudstack.api.command.admin.host.ListHostsCmd;
import org.apache.cloudstack.api.command.admin.internallb.ListInternalLBVMsCmd;
import org.apache.cloudstack.api.command.admin.iso.ListIsosCmdByAdmin;
import org.apache.cloudstack.api.command.admin.management.ListMgmtsCmd;
import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd;
import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd;
import org.apache.cloudstack.api.command.admin.storage.ListImageStoresCmd;
import org.apache.cloudstack.api.command.admin.storage.ListSecondaryStagingStoresCmd;
@ -98,6 +97,7 @@ import org.apache.cloudstack.api.response.ProjectInvitationResponse;
import org.apache.cloudstack.api.response.ProjectResponse;
import org.apache.cloudstack.api.response.ResourceDetailResponse;
import org.apache.cloudstack.api.response.ResourceTagResponse;
import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse;
import org.apache.cloudstack.api.response.SecurityGroupResponse;
import org.apache.cloudstack.api.response.ServiceOfferingResponse;
import org.apache.cloudstack.api.response.StoragePoolResponse;
@ -123,6 +123,7 @@ import org.apache.commons.collections.CollectionUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import com.cloud.agent.api.storage.OVFProperty;
import com.cloud.api.query.dao.AccountJoinDao;
import com.cloud.api.query.dao.AffinityGroupJoinDao;
import com.cloud.api.query.dao.AsyncJobJoinDao;
@ -182,6 +183,10 @@ import com.cloud.exception.PermissionDeniedException;
import com.cloud.ha.HighAvailabilityManager;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.network.RouterHealthCheckResult;
import com.cloud.network.VpcVirtualNetworkApplianceService;
import com.cloud.network.dao.RouterHealthCheckResultDao;
import com.cloud.network.router.VirtualNetworkApplianceManager;
import com.cloud.network.security.SecurityGroupVMMapVO;
import com.cloud.network.security.dao.SecurityGroupVMMapDao;
import com.cloud.org.Grouping;
@ -206,9 +211,11 @@ import com.cloud.storage.Storage;
import com.cloud.storage.Storage.ImageFormat;
import com.cloud.storage.Storage.TemplateType;
import com.cloud.storage.StoragePoolTagVO;
import com.cloud.storage.TemplateOVFPropertyVO;
import com.cloud.storage.VMTemplateVO;
import com.cloud.storage.Volume;
import com.cloud.storage.dao.StoragePoolTagsDao;
import com.cloud.storage.dao.TemplateOVFPropertiesDao;
import com.cloud.storage.dao.VMTemplateDao;
import com.cloud.tags.ResourceTagVO;
import com.cloud.tags.dao.ResourceTagDao;
@ -395,6 +402,15 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
@Inject
TemplateOVFPropertiesDao templateOVFPropertiesDao;
@Inject
public VpcVirtualNetworkApplianceService routerService;
@Inject
private ResponseGenerator responseGenerator;
@Inject
private RouterHealthCheckResultDao routerHealthCheckResultDao;
/*
* (non-Javadoc)
*
@ -1198,8 +1214,17 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
Pair<List<DomainRouterJoinVO>, Integer> result = searchForRoutersInternal(cmd, cmd.getId(), cmd.getRouterName(), cmd.getState(), cmd.getZoneId(), cmd.getPodId(), cmd.getClusterId(),
cmd.getHostId(), cmd.getKeyword(), cmd.getNetworkId(), cmd.getVpcId(), cmd.getForVpc(), cmd.getRole(), cmd.getVersion());
ListResponse<DomainRouterResponse> response = new ListResponse<DomainRouterResponse>();
List<DomainRouterResponse> routerResponses = ViewResponseHelper.createDomainRouterResponse(result.first().toArray(new DomainRouterJoinVO[result.first().size()]));
if (VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) {
for (DomainRouterResponse res : routerResponses) {
DomainRouterVO resRouter = _routerDao.findByUuid(res.getId());
res.setHealthChecksFailed(routerHealthCheckResultDao.hasFailingChecks(resRouter.getId()));
if (cmd.shouldFetchHealthCheckResults()) {
res.setHealthCheckResults(responseGenerator.createHealthCheckResponse(resRouter,
new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(resRouter.getId()))));
}
}
}
response.setResponses(routerResponses, result.second());
return response;
}
@ -1209,8 +1234,18 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
Pair<List<DomainRouterJoinVO>, Integer> result = searchForRoutersInternal(cmd, cmd.getId(), cmd.getRouterName(), cmd.getState(), cmd.getZoneId(), cmd.getPodId(), null, cmd.getHostId(),
cmd.getKeyword(), cmd.getNetworkId(), cmd.getVpcId(), cmd.getForVpc(), cmd.getRole(), null);
ListResponse<DomainRouterResponse> response = new ListResponse<DomainRouterResponse>();
List<DomainRouterResponse> routerResponses = ViewResponseHelper.createDomainRouterResponse(result.first().toArray(new DomainRouterJoinVO[result.first().size()]));
if (VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) {
for (DomainRouterResponse res : routerResponses) {
DomainRouterVO resRouter = _routerDao.findByUuid(res.getId());
res.setHealthChecksFailed(routerHealthCheckResultDao.hasFailingChecks(resRouter.getId()));
if (cmd.shouldFetchHealthCheckResults()) {
res.setHealthCheckResults(responseGenerator.createHealthCheckResponse(resRouter,
new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(resRouter.getId()))));
}
}
}
response.setResponses(routerResponses, result.second());
return response;
}
@ -3928,6 +3963,27 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
return response;
}
@Override
public List<RouterHealthCheckResultResponse> listRouterHealthChecks(GetRouterHealthCheckResultsCmd cmd) {
s_logger.info("Executing health check command " + cmd);
long routerId = cmd.getRouterId();
if (!VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) {
throw new CloudRuntimeException("Router health checks are not enabled for router " + routerId);
}
if (cmd.shouldPerformFreshChecks() && !routerService.performRouterHealthChecks(routerId)) {
throw new CloudRuntimeException("Unable to perform fresh checks on router.");
}
List<RouterHealthCheckResult> result = new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(routerId));
if (result == null || result.size() == 0) {
throw new CloudRuntimeException("Database had no entries for health checks for router. This could happen for " +
"a newly created router. Please wait for periodic results to populate or manually call for checks to execute.");
}
return responseGenerator.createHealthCheckResponse(_routerDao.findById(routerId), result);
}
@Override
public String getConfigComponentName() {
return QueryService.class.getSimpleName();

View File

@ -1861,14 +1861,7 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService {
@Override
@ActionEvent(eventType = EventTypes.EVENT_NETWORK_RESTART, eventDescription = "restarting network", async = true)
public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
// This method restarts all network elements belonging to the network and re-applies all the rules
Long networkId = cmd.getNetworkId();
User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId());
Account callerAccount = _accountMgr.getActiveAccountById(callerUser.getAccountId());
// Check if network exists
public boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
NetworkVO network = _networksDao.findById(networkId);
if (network == null) {
throwInvalidIdException("Network with specified id doesn't exist", networkId.toString(), "networkId");
@ -1888,8 +1881,8 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService {
throw new InvalidParameterException("Unable to restart a running SDN network.");
}
Account callerAccount = _accountMgr.getActiveAccountById(user.getAccountId());
_accountMgr.checkAccess(callerAccount, null, true, network);
if (!network.isRedundant() && makeRedundant) {
network.setRedundant(true);
if (!_networksDao.update(network.getId(), network)) {
@ -1898,8 +1891,7 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService {
cleanup = true;
}
boolean success = _networkMgr.restartNetwork(networkId, callerAccount, callerUser, cleanup);
boolean success = _networkMgr.restartNetwork(networkId, callerAccount, user, cleanup);
if (success) {
s_logger.debug("Network id=" + networkId + " is restarted successfully.");
} else {
@ -1909,6 +1901,17 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService {
return success;
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_NETWORK_RESTART, eventDescription = "restarting network", async = true)
public boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
// This method restarts all network elements belonging to the network and re-applies all the rules
Long networkId = cmd.getNetworkId();
boolean cleanup = cmd.getCleanup();
boolean makeRedundant = cmd.getMakeRedundant();
User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId());
return restartNetwork(networkId, cleanup, makeRedundant, callerUser);
}
@Override
public int getActiveNicsInNetwork(long networkId) {
return _networksDao.getActiveNicsIn(networkId);

View File

@ -18,23 +18,21 @@ package com.cloud.network.firewall;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Collections;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import com.cloud.network.dao.FirewallRulesDcidrsDao;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import org.apache.cloudstack.api.command.user.firewall.IListFirewallRulesCmd;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import com.cloud.configuration.Config;
import com.cloud.domain.dao.DomainDao;
@ -55,6 +53,7 @@ import com.cloud.network.NetworkModel;
import com.cloud.network.NetworkRuleApplier;
import com.cloud.network.dao.FirewallRulesCidrsDao;
import com.cloud.network.dao.FirewallRulesDao;
import com.cloud.network.dao.FirewallRulesDcidrsDao;
import com.cloud.network.dao.IPAddressDao;
import com.cloud.network.dao.IPAddressVO;
import com.cloud.network.dao.NetworkDao;

View File

@ -258,7 +258,7 @@ public class NetworkHelperImpl implements NetworkHelper {
@Override
public boolean checkRouterVersion(final VirtualRouter router) {
if (!VirtualNetworkApplianceManagerImpl.routerVersionCheckEnabled.value()) {
if (!VirtualNetworkApplianceManager.RouterVersionCheckEnabled.value()) {
// Router version check is disabled.
return true;
}

View File

@ -45,6 +45,10 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA
static final String SetServiceMonitorCK = "network.router.EnableServiceMonitoring";
static final String RouterAlertsCheckIntervalCK = "router.alerts.check.interval";
static final String RouterHealthChecksConfigRefreshIntervalCK = "router.health.checks.config.refresh.interval";
static final String RouterHealthChecksResultFetchIntervalCK = "router.health.checks.results.fetch.interval";
static final String RouterHealthChecksFailuresToRecreateVrCK = "router.health.checks.failures.to.recreate.vr";
static final ConfigKey<String> RouterTemplateXen = new ConfigKey<String>(String.class, RouterTemplateXenCK, "Advanced", "SystemVM Template (XenServer)",
"Name of the default router template on Xenserver.", true, ConfigKey.Scope.Zone, null);
static final ConfigKey<String> RouterTemplateKvm = new ConfigKey<String>(String.class, RouterTemplateKvmCK, "Advanced", "SystemVM Template (KVM)",
@ -63,11 +67,48 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA
static final ConfigKey<Integer> RouterAlertsCheckInterval = new ConfigKey<Integer>(Integer.class, RouterAlertsCheckIntervalCK, "Advanced", "1800",
"Interval (in seconds) to check for alerts in Virtual Router.", false, ConfigKey.Scope.Global, null);
static final ConfigKey<Boolean> routerVersionCheckEnabled = new ConfigKey<Boolean>("Advanced", Boolean.class, "router.version.check", "true",
static final ConfigKey<Boolean> RouterVersionCheckEnabled = new ConfigKey<Boolean>("Advanced", Boolean.class, "router.version.check", "true",
"If true, router minimum required version is checked before sending command", false);
static final ConfigKey<Boolean> UseExternalDnsServers = new ConfigKey<Boolean>(Boolean.class, "use.external.dns", "Advanced", "false",
"Bypass internal dns, use external dns1 and dns2", true, ConfigKey.Scope.Zone, null);
// Health checks
static final ConfigKey<Boolean> RouterHealthChecksEnabled = new ConfigKey<Boolean>(Boolean.class, "router.health.checks.enabled", "Advanced", "true",
"If true, router health checks are allowed to be executed and read. If false, all scheduled checks and API calls for on demand checks are disabled.",
true, ConfigKey.Scope.Global, null);
static final ConfigKey<Integer> RouterHealthChecksBasicInterval = new ConfigKey<Integer>(Integer.class, "router.health.checks.basic.interval", "Advanced", "3",
"Interval in minutes at which basic router health checks are performed. If set to 0, no tests are scheduled.",
true, ConfigKey.Scope.Global, null);
static final ConfigKey<Integer> RouterHealthChecksAdvancedInterval = new ConfigKey<Integer>(Integer.class, "router.health.checks.advanced.interval", "Advanced", "10",
"Interval in minutes at which advanced router health checks are performed. If set to 0, no tests are scheduled.",
true, ConfigKey.Scope.Global, null);
static final ConfigKey<Integer> RouterHealthChecksConfigRefreshInterval = new ConfigKey<Integer>(Integer.class, RouterHealthChecksConfigRefreshIntervalCK, "Advanced", "10",
"Interval in minutes at which router health checks config - such as scheduling intervals, excluded checks, etc is updated on virtual routers by the management server. This value should" +
" be sufficiently high (like 2x) from the router.health.checks.basic.interval and router.health.checks.advanced.interval so that there is time between new results generation and results generation for passed data.",
false, ConfigKey.Scope.Global, null);
static final ConfigKey<Integer> RouterHealthChecksResultFetchInterval = new ConfigKey<Integer>(Integer.class, RouterHealthChecksResultFetchIntervalCK, "Advanced", "10",
"Interval in minutes at which router health checks results are fetched by management server. On each result fetch, management server evaluates need to recreate VR as per configuration of " + RouterHealthChecksFailuresToRecreateVrCK +
"This value should be sufficiently high (like 2x) from the router.health.checks.basic.interval and router.health.checks.advanced.interval so that there is time between new results generation and fetch.",
false, ConfigKey.Scope.Global, null);
static final ConfigKey<String> RouterHealthChecksFailuresToRecreateVr = new ConfigKey<String>(String.class, RouterHealthChecksFailuresToRecreateVrCK, "Advanced", "",
"Health checks failures defined by this config are the checks that should cause router recreation. If empty the recreate is not attempted for any health check failure. Possible values are comma separated script names " +
"from systemvms /root/health_scripts/ (namely - cpu_usage_check.py, dhcp_check.py, disk_space_check.py, dns_check.py, gateways_check.py, haproxy_check.py, iptables_check.py, memory_usage_check.py, router_version_check.py), connectivity.test " +
" or services (namely - loadbalancing.service, webserver.service, dhcp.service) ",
true, ConfigKey.Scope.Zone, null);
static final ConfigKey<String> RouterHealthChecksToExclude = new ConfigKey<String>(String.class, "router.health.checks.to.exclude", "Advanced", "",
"Health checks that should be excluded when executing scheduled checks on the router. This can be a comma separated list of script names placed in the '/root/health_checks/' folder. Currently the following scripts are " +
"placed in default systemvm template - cpu_usage_check.py, disk_space_check.py, gateways_check.py, iptables_check.py, router_version_check.py, dhcp_check.py, dns_check.py, haproxy_check.py, memory_usage_check.py.",
true, ConfigKey.Scope.Zone, null);
static final ConfigKey<Double> RouterHealthChecksFreeDiskSpaceThreshold = new ConfigKey<Double>(Double.class, "router.health.checks.free.disk.space.threshold",
"Advanced", "100", "Free disk space threshold (in MB) on VR below which the check is considered a failure.",
true, ConfigKey.Scope.Zone, null);
static final ConfigKey<Double> RouterHealthChecksMaxCpuUsageThreshold = new ConfigKey<Double>(Double.class, "router.health.checks.max.cpu.usage.threshold",
"Advanced", "100", " Max CPU Usage threshold as % above which check is considered a failure.",
true, ConfigKey.Scope.Zone, null);
static final ConfigKey<Double> RouterHealthChecksMaxMemoryUsageThreshold = new ConfigKey<Double>(Double.class, "router.health.checks.max.memory.usage.threshold",
"Advanced", "100", "Max Memory Usage threshold as % above which check is considered a failure.",
true, ConfigKey.Scope.Zone, null);
public static final int DEFAULT_ROUTER_VM_RAMSIZE = 256; // 256M
public static final int DEFAULT_ROUTER_CPU_MHZ = 500; // 500 MHz
public static final boolean USE_POD_VLAN = false;

View File

@ -17,6 +17,7 @@
package com.cloud.network.router;
import java.lang.reflect.Type;
import java.math.BigInteger;
import java.nio.charset.Charset;
import java.security.MessageDigest;
@ -24,7 +25,9 @@ import java.security.NoSuchAlgorithmException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
@ -42,11 +45,6 @@ import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import org.apache.log4j.Logger;
import org.cloud.network.router.deployment.RouterDeploymentDefinitionBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.apache.cloudstack.alert.AlertService;
import org.apache.cloudstack.alert.AlertService.AlertType;
import org.apache.cloudstack.api.command.admin.router.RebootRouterCmd;
@ -61,11 +59,18 @@ import org.apache.cloudstack.framework.config.Configurable;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO;
import org.apache.cloudstack.lb.ApplicationLoadBalancerRuleVO;
import org.apache.cloudstack.lb.dao.ApplicationLoadBalancerRuleDao;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.cloudstack.network.topology.NetworkTopology;
import org.apache.cloudstack.network.topology.NetworkTopologyContext;
import org.apache.cloudstack.utils.identity.ManagementServerNode;
import org.apache.cloudstack.utils.usage.UsageUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.log4j.Logger;
import org.cloud.network.router.deployment.RouterDeploymentDefinitionBuilder;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
@ -87,6 +92,9 @@ import com.cloud.agent.api.check.CheckSshCommand;
import com.cloud.agent.api.routing.AggregationControlCommand;
import com.cloud.agent.api.routing.AggregationControlCommand.Action;
import com.cloud.agent.api.routing.GetRouterAlertsCommand;
import com.cloud.agent.api.routing.GetRouterMonitorResultsAnswer;
import com.cloud.agent.api.routing.GetRouterMonitorResultsCommand;
import com.cloud.agent.api.routing.GroupAnswer;
import com.cloud.agent.api.routing.IpAliasTO;
import com.cloud.agent.api.routing.NetworkElementCommand;
import com.cloud.agent.api.routing.SetMonitorServiceCommand;
@ -95,6 +103,10 @@ import com.cloud.agent.manager.Commands;
import com.cloud.alert.AlertManager;
import com.cloud.api.ApiAsyncJobDispatcher;
import com.cloud.api.ApiGsonHelper;
import com.cloud.api.query.dao.DomainRouterJoinDao;
import com.cloud.api.query.dao.UserVmJoinDao;
import com.cloud.api.query.vo.DomainRouterJoinVO;
import com.cloud.api.query.vo.UserVmJoinVO;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.configuration.Config;
@ -109,7 +121,9 @@ import com.cloud.dc.dao.DataCenterDao;
import com.cloud.dc.dao.HostPodDao;
import com.cloud.dc.dao.VlanDao;
import com.cloud.deploy.DeployDestination;
import com.cloud.domain.Domain;
import com.cloud.event.ActionEvent;
import com.cloud.event.ActionEventUtils;
import com.cloud.event.EventTypes;
import com.cloud.exception.AgentUnavailableException;
import com.cloud.exception.ConcurrentOperationException;
@ -135,6 +149,7 @@ import com.cloud.network.NetworkService;
import com.cloud.network.Networks.TrafficType;
import com.cloud.network.PublicIpAddress;
import com.cloud.network.RemoteAccessVpn;
import com.cloud.network.RouterHealthCheckResult;
import com.cloud.network.Site2SiteCustomerGateway;
import com.cloud.network.Site2SiteVpnConnection;
import com.cloud.network.SshKeysDistriMonitor;
@ -144,8 +159,11 @@ import com.cloud.network.addr.PublicIp;
import com.cloud.network.dao.FirewallRulesDao;
import com.cloud.network.dao.IPAddressDao;
import com.cloud.network.dao.IPAddressVO;
import com.cloud.network.dao.LBStickinessPolicyDao;
import com.cloud.network.dao.LBStickinessPolicyVO;
import com.cloud.network.dao.LoadBalancerDao;
import com.cloud.network.dao.LoadBalancerVMMapDao;
import com.cloud.network.dao.LoadBalancerVMMapVO;
import com.cloud.network.dao.LoadBalancerVO;
import com.cloud.network.dao.MonitoringServiceDao;
import com.cloud.network.dao.MonitoringServiceVO;
@ -155,6 +173,8 @@ import com.cloud.network.dao.OpRouterMonitorServiceDao;
import com.cloud.network.dao.OpRouterMonitorServiceVO;
import com.cloud.network.dao.PhysicalNetworkServiceProviderDao;
import com.cloud.network.dao.RemoteAccessVpnDao;
import com.cloud.network.dao.RouterHealthCheckResultDao;
import com.cloud.network.dao.RouterHealthCheckResultVO;
import com.cloud.network.dao.Site2SiteCustomerGatewayDao;
import com.cloud.network.dao.Site2SiteVpnConnectionDao;
import com.cloud.network.dao.Site2SiteVpnConnectionVO;
@ -175,12 +195,14 @@ import com.cloud.network.rules.FirewallRule.Purpose;
import com.cloud.network.rules.FirewallRuleVO;
import com.cloud.network.rules.LoadBalancerContainer.Scheme;
import com.cloud.network.rules.PortForwardingRule;
import com.cloud.network.rules.PortForwardingRuleVO;
import com.cloud.network.rules.RulesManager;
import com.cloud.network.rules.StaticNat;
import com.cloud.network.rules.StaticNatImpl;
import com.cloud.network.rules.StaticNatRule;
import com.cloud.network.rules.dao.PortForwardingRulesDao;
import com.cloud.network.vpc.Vpc;
import com.cloud.network.vpc.VpcService;
import com.cloud.network.vpc.dao.VpcDao;
import com.cloud.network.vpn.Site2SiteVpnManager;
import com.cloud.offering.NetworkOffering;
@ -188,6 +210,7 @@ import com.cloud.offering.ServiceOffering;
import com.cloud.offerings.NetworkOfferingVO;
import com.cloud.offerings.dao.NetworkOfferingDao;
import com.cloud.resource.ResourceManager;
import com.cloud.serializer.GsonHelper;
import com.cloud.server.ConfigurationServer;
import com.cloud.service.ServiceOfferingVO;
import com.cloud.service.dao.ServiceOfferingDao;
@ -214,6 +237,7 @@ import com.cloud.utils.db.EntityManager;
import com.cloud.utils.db.Filter;
import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.db.QueryBuilder;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.db.TransactionCallbackNoReturn;
@ -243,6 +267,8 @@ import com.cloud.vm.dao.NicIpAliasVO;
import com.cloud.vm.dao.UserVmDao;
import com.cloud.vm.dao.UserVmDetailsDao;
import com.cloud.vm.dao.VMInstanceDao;
import com.google.gson.JsonSyntaxException;
import com.google.gson.reflect.TypeToken;
/**
* VirtualNetworkApplianceManagerImpl manages the different types of virtual
@ -251,6 +277,7 @@ import com.cloud.vm.dao.VMInstanceDao;
public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements VirtualNetworkApplianceManager, VirtualNetworkApplianceService, VirtualMachineGuru, Listener,
Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualMachine> {
private static final Logger s_logger = Logger.getLogger(VirtualNetworkApplianceManagerImpl.class);
private static final String CONNECTIVITY_TEST = "connectivity.test";
@Inject private EntityManager _entityMgr;
@Inject private DataCenterDao _dcDao;
@ -272,12 +299,12 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
@Inject private AccountManager _accountMgr;
@Inject private ConfigurationManager _configMgr;
@Inject private ConfigurationServer _configServer;
@Inject private ServiceOfferingDao _serviceOfferingDao;
@Inject protected ServiceOfferingDao _serviceOfferingDao;
@Inject private UserVmDao _userVmDao;
@Inject private VMInstanceDao _vmDao;
@Inject private NetworkOfferingDao _networkOfferingDao;
@Inject private GuestOSDao _guestOSDao;
@Inject private NetworkOrchestrationService _networkMgr;
@Inject protected NetworkOrchestrationService _networkMgr;
@Inject protected NetworkModel _networkModel;
@Inject protected VirtualMachineManager _itMgr;
@Inject private VpnUserDao _vpnUsersDao;
@ -303,7 +330,7 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
@Inject private NetworkService _networkSvc;
@Inject private IpAddressManager _ipAddrMgr;
@Inject private ConfigDepot _configDepot;
@Inject private MonitoringServiceDao _monitorServiceDao;
@Inject protected MonitoringServiceDao _monitorServiceDao;
@Inject private AsyncJobManager _asyncMgr;
@Inject protected VpcDao _vpcDao;
@Inject protected ApiAsyncJobDispatcher _asyncDispatcher;
@ -311,6 +338,16 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
@Inject protected NetworkTopologyContext _networkTopologyContext;
@Inject private UserVmJoinDao userVmJoinDao;
@Inject private DomainRouterJoinDao domainRouterJoinDao;
@Inject private PortForwardingRulesDao portForwardingDao;
@Inject private ApplicationLoadBalancerRuleDao applicationLoadBalancerRuleDao;
@Inject private RouterHealthCheckResultDao routerHealthCheckResultDao;
@Inject private LBStickinessPolicyDao lbStickinessPolicyDao;
@Inject private NetworkService networkService;
@Inject private VpcService vpcService;
@Autowired
@Qualifier("networkHelper")
protected NetworkHelper _nwHelper;
@ -496,12 +533,6 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
}
}
static final ConfigKey<Boolean> UseExternalDnsServers = new ConfigKey<Boolean>(Boolean.class, "use.external.dns", "Advanced", "false",
"Bypass internal dns, use external dns1 and dns2", true, ConfigKey.Scope.Zone, null);
static final ConfigKey<Boolean> routerVersionCheckEnabled = new ConfigKey<Boolean>("Advanced", Boolean.class, "router.version.check", "true",
"If true, router minimum required version is checked before sending command", false);
@Override
public boolean configure(final String name, final Map<String, Object> params) throws ConfigurationException {
@ -658,7 +689,21 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
if (routerAlertsCheckInterval > 0) {
_checkExecutor.scheduleAtFixedRate(new CheckRouterAlertsTask(), routerAlertsCheckInterval, routerAlertsCheckInterval, TimeUnit.SECONDS);
} else {
s_logger.debug("router.alerts.check.interval - " + routerAlertsCheckInterval + " so not scheduling the router alerts checking thread");
s_logger.debug(RouterAlertsCheckIntervalCK + "=" + routerAlertsCheckInterval + " so not scheduling the router alerts checking thread");
}
final int routerHealthCheckConfigRefreshInterval = RouterHealthChecksConfigRefreshInterval.value();
if (routerHealthCheckConfigRefreshInterval > 0) {
_checkExecutor.scheduleAtFixedRate(new UpdateRouterHealthChecksConfigTask(), routerHealthCheckConfigRefreshInterval, routerHealthCheckConfigRefreshInterval, TimeUnit.MINUTES);
} else {
s_logger.debug(RouterHealthChecksConfigRefreshIntervalCK + "=" + routerHealthCheckConfigRefreshInterval + " so not scheduling the router health check data thread");
}
final int routerHealthChecksFetchInterval = RouterHealthChecksResultFetchInterval.value();
if (routerHealthChecksFetchInterval > 0) {
_checkExecutor.scheduleAtFixedRate(new FetchRouterHealthChecksResultTask(), routerHealthChecksFetchInterval, routerHealthChecksFetchInterval, TimeUnit.MINUTES);
} else {
s_logger.debug(RouterHealthChecksResultFetchIntervalCK + "=" + routerHealthChecksFetchInterval + " so not scheduling the router checks fetching thread");
}
return true;
@ -1186,6 +1231,599 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
}
}
protected class FetchRouterHealthChecksResultTask extends ManagedContextRunnable {
public FetchRouterHealthChecksResultTask() {
}
@Override
protected void runInContext() {
try {
final List<DomainRouterVO> routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId);
s_logger.info("Found " + routers.size() + " running routers. Fetching, analysing and updating DB for the health checks.");
if (!RouterHealthChecksEnabled.value()) {
s_logger.debug("Skipping fetching of router health check results as router.health.checks.enabled is disabled");
return;
}
for (final DomainRouterVO router : routers) {
GetRouterMonitorResultsAnswer answer = fetchAndUpdateRouterHealthChecks(router, false);
List<String> failingChecks = getFailingChecks(router, answer);
handleFailingChecks(router, failingChecks);
}
} catch (final Exception ex) {
s_logger.error("Fail to complete the FetchRouterHealthChecksResultTask! ", ex);
ex.printStackTrace();
}
}
private List<String> getFailingChecks(DomainRouterVO router, GetRouterMonitorResultsAnswer answer) {
if (answer == null) {
s_logger.warn("Unable to fetch monitor results for router " + router);
resetRouterHealthChecksAndConnectivity(router.getId(), false, "Communication failed");
return Arrays.asList(CONNECTIVITY_TEST);
} else if (!answer.getResult()) {
s_logger.warn("Failed to fetch monitor results from router " + router + " with details: " + answer.getDetails());
resetRouterHealthChecksAndConnectivity(router.getId(), false, "Failed to fetch results with details: " + answer.getDetails());
return Arrays.asList(CONNECTIVITY_TEST);
} else {
resetRouterHealthChecksAndConnectivity(router.getId(), true, "Successfully fetched data");
updateDbHealthChecksFromRouterResponse(router.getId(), answer.getMonitoringResults());
return answer.getFailingChecks();
}
}
private void handleFailingChecks(DomainRouterVO router, List<String> failingChecks) {
if (failingChecks == null || failingChecks.size() == 0) {
return;
}
String alertMessage = "Health checks failed: " + failingChecks.size() + " failing checks on router " + router.getUuid();
_alertMgr.sendAlert(AlertType.ALERT_TYPE_DOMAIN_ROUTER, router.getDataCenterId(), router.getPodIdToDeployIn(),
alertMessage, alertMessage);
s_logger.warn(alertMessage + ". Checking failed health checks to see if router needs recreate");
String checkFailsToRecreateVr = RouterHealthChecksFailuresToRecreateVr.valueIn(router.getDataCenterId());
StringBuilder failingChecksEvent = new StringBuilder();
boolean recreateRouter = false;
for (int i = 0; i < failingChecks.size(); i++) {
String failedCheck = failingChecks.get(i);
if (i == 0) {
failingChecksEvent.append("Router ")
.append(router.getUuid())
.append(" has failing checks: ");
}
failingChecksEvent.append(failedCheck);
if (i < failingChecks.size() - 1) {
failingChecksEvent.append(", ");
}
if (StringUtils.isNotBlank(checkFailsToRecreateVr) && checkFailsToRecreateVr.contains(failedCheck)) {
recreateRouter = true;
}
}
ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM,
Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS, failingChecksEvent.toString());
if (recreateRouter) {
s_logger.warn("Health Check Alert: Found failing checks in " +
RouterHealthChecksFailuresToRecreateVrCK + ", attempting recreating router.");
recreateRouter(router.getId());
}
}
}
private DomainRouterJoinVO getAnyRouterJoinWithVpc(long routerId) {
List<DomainRouterJoinVO> routerJoinVOs = domainRouterJoinDao.searchByIds(routerId);
for (DomainRouterJoinVO router : routerJoinVOs) {
if (router.getRemoved() == null && router.getVpcId() != 0) {
return router;
}
}
return null;
}
private boolean restartVpcInDomainRouter(DomainRouterJoinVO router, User user) {
try {
s_logger.debug("Attempting restart VPC " + router.getVpcName() + " for router recreation " + router.getUuid());
ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM,
Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS,
"Recreating router " + router.getUuid() + " by restarting VPC " + router.getVpcUuid());
return vpcService.restartVpc(router.getVpcId(), true, false, user);
} catch (Exception e) {
s_logger.error("Failed to restart VPC for router recreation " +
router.getVpcName() + " ,router " + router.getUuid(), e);
return false;
}
}
private DomainRouterJoinVO getAnyRouterJoinWithGuestTraffic(long routerId) {
List<DomainRouterJoinVO> routerJoinVOs = domainRouterJoinDao.searchByIds(routerId);
for (DomainRouterJoinVO router : routerJoinVOs) {
if (router.getRemoved() == null && router.getTrafficType() == TrafficType.Guest) {
return router;
}
}
return null;
}
private boolean restartGuestNetworkInDomainRouter(DomainRouterJoinVO router, User user) {
try {
s_logger.info("Attempting restart network " + router.getNetworkName() + " for router recreation " + router.getUuid());
ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM,
Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS,
"Recreating router " + router.getUuid() + " by restarting network " + router.getNetworkUuid());
return networkService.restartNetwork(router.getNetworkId(), true, false, user);
} catch (Exception e) {
s_logger.error("Failed to restart network " + router.getNetworkName() +
" for router recreation " + router.getNetworkName(), e);
return false;
}
}
/**
* Attempts recreation of router by restarting with cleanup a VPC if any or a guest network associated in case no VPC.
* @param routerId - the id of the router to be recreated.
* @return true if successfully restart is attempted else false.
*/
private boolean recreateRouter(long routerId) {
User systemUser = _userDao.getUser(User.UID_SYSTEM);
// Find any VPC containing router join VO, restart it and return
DomainRouterJoinVO routerJoinToRestart = getAnyRouterJoinWithVpc(routerId);
if (routerJoinToRestart != null) {
return restartVpcInDomainRouter(routerJoinToRestart, systemUser);
}
// If no VPC containing router join VO was found we look for a guest network traffic containing join VO and restart that.
routerJoinToRestart = getAnyRouterJoinWithGuestTraffic(routerId);
if (routerJoinToRestart != null) {
return restartGuestNetworkInDomainRouter(routerJoinToRestart, systemUser);
}
s_logger.warn("Unable to find a valid guest network or VPC to restart for recreating router id " + routerId);
return false;
}
private Map<String, Map<String, RouterHealthCheckResultVO>> getHealthChecksFromDb(long routerId) {
List<RouterHealthCheckResultVO> healthChecksList = routerHealthCheckResultDao.getHealthCheckResults(routerId);
Map<String, Map<String, RouterHealthCheckResultVO>> healthCheckResults = new HashMap<>();
if (healthChecksList.isEmpty()) {
return healthCheckResults;
}
for (RouterHealthCheckResultVO healthCheck : healthChecksList) {
if (!healthCheckResults.containsKey(healthCheck.getCheckType())) {
healthCheckResults.put(healthCheck.getCheckType(), new HashMap<>());
}
healthCheckResults.get(healthCheck.getCheckType()).put(healthCheck.getCheckName(), healthCheck);
}
return healthCheckResults;
}
private RouterHealthCheckResultVO resetRouterHealthChecksAndConnectivity(final long routerId, boolean connected, String message) {
routerHealthCheckResultDao.expungeHealthChecks(routerId);
boolean newEntry = false;
RouterHealthCheckResultVO connectivityVO = routerHealthCheckResultDao.getRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic");
if (connectivityVO == null) {
connectivityVO = new RouterHealthCheckResultVO(routerId, CONNECTIVITY_TEST, "basic");
newEntry = true;
}
connectivityVO.setCheckResult(connected);
connectivityVO.setLastUpdateTime(new Date());
if (StringUtils.isNotEmpty(message)) {
connectivityVO.setCheckDetails(message.getBytes(com.cloud.utils.StringUtils.getPreferredCharset()));
}
if (newEntry) {
routerHealthCheckResultDao.persist(connectivityVO);
} else {
routerHealthCheckResultDao.update(connectivityVO.getId(), connectivityVO);
}
return routerHealthCheckResultDao.getRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic");
}
private RouterHealthCheckResultVO parseHealthCheckVOFromJson(final long routerId,
final String checkName, final String checkType, final Map<String, String> checkData,
final Map<String, Map<String, RouterHealthCheckResultVO>> checksInDb) {
boolean success = Boolean.parseBoolean(checkData.get("success"));
Date lastUpdate = new Date(Long.parseLong(checkData.get("lastUpdate")));
double lastRunDuration = Double.parseDouble(checkData.get("lastRunDuration"));
String message = checkData.get("message");
final RouterHealthCheckResultVO hcVo;
boolean newEntry = false;
if (checksInDb.containsKey(checkType) && checksInDb.get(checkType).containsKey(checkName)) {
hcVo = checksInDb.get(checkType).get(checkName);
} else {
hcVo = new RouterHealthCheckResultVO(routerId, checkName, checkType);
newEntry = true;
}
hcVo.setCheckResult(success);
hcVo.setLastUpdateTime(lastUpdate);
if (StringUtils.isNotEmpty(message)) {
hcVo.setCheckDetails(message.getBytes(com.cloud.utils.StringUtils.getPreferredCharset()));
}
if (newEntry) {
routerHealthCheckResultDao.persist(hcVo);
} else {
routerHealthCheckResultDao.update(hcVo.getId(), hcVo);
}
s_logger.info("Found health check " + hcVo + " which took running duration (ms) " + lastRunDuration);
return hcVo;
}
/**
*
* @param checksJson JSON expected is
* {
* checkType1: {
* checkName1: {
* success: true/false,
* lastUpdate: date string,
* lastRunDuration: ms spent on test,
* message: detailed message from check execution
* },
* checkType2: .....
* },
* checkType2: ......
* }
* @return converts the above JSON into list of RouterHealthCheckResult.
*/
private List<RouterHealthCheckResult> parseHealthCheckResults(
final Map<String, Map<String, Map<String, String>>> checksJson, final long routerId) {
final Map<String, Map<String, RouterHealthCheckResultVO>> checksInDb = getHealthChecksFromDb(routerId);
List<RouterHealthCheckResult> healthChecks = new ArrayList<>();
final String lastRunKey = "lastRun";
for (String checkType : checksJson.keySet()) {
if (checksJson.get(checkType).containsKey(lastRunKey)) { // Log last run of this check type run info
Map<String, String> lastRun = checksJson.get(checkType).get(lastRunKey);
s_logger.info("Found check types executed on VR " + checkType + ", start: " + lastRun.get("start") +
", end: " + lastRun.get("end") + ", duration: " + lastRun.get("duration"));
}
for (String checkName : checksJson.get(checkType).keySet()) {
if (lastRunKey.equals(checkName)) {
continue;
}
try {
final RouterHealthCheckResultVO hcVo = parseHealthCheckVOFromJson(
routerId, checkName, checkType, checksJson.get(checkType).get(checkName), checksInDb);
healthChecks.add(hcVo);
} catch (Exception ex) {
s_logger.error("Skipping health check: Exception while parsing check result data for router id " + routerId +
", check type: " + checkType + ", check name: " + checkName + ":" + ex.getLocalizedMessage(), ex);
}
}
}
return healthChecks;
}
private List<RouterHealthCheckResult> updateDbHealthChecksFromRouterResponse(final long routerId, final String monitoringResult) {
if (StringUtils.isBlank(monitoringResult)) {
s_logger.warn("Attempted parsing empty monitoring results string for router " + routerId);
return Collections.emptyList();
}
try {
s_logger.debug("Parsing and updating DB health check data for router: " + routerId + " with data: " + monitoringResult) ;
final Type t = new TypeToken<Map<String, Map<String, Map<String, String>>>>() {}.getType();
final Map<String, Map<String, Map<String, String>>> checks = GsonHelper.getGson().fromJson(monitoringResult, t);
return parseHealthCheckResults(checks, routerId);
} catch (JsonSyntaxException ex) {
s_logger.error("Unable to parse the result of health checks due to " + ex.getLocalizedMessage(), ex);
}
return Collections.emptyList();
}
private GetRouterMonitorResultsAnswer fetchAndUpdateRouterHealthChecks(DomainRouterVO router, boolean performFreshChecks) {
if (!RouterHealthChecksEnabled.value()) {
return null;
}
String controlIP = getRouterControlIP(router);
if (StringUtils.isNotBlank(controlIP) && !controlIP.equals("0.0.0.0")) {
final GetRouterMonitorResultsCommand command = new GetRouterMonitorResultsCommand(performFreshChecks);
command.setAccessDetail(NetworkElementCommand.ROUTER_IP, controlIP);
command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName());
try {
final Answer answer = _agentMgr.easySend(router.getHostId(), command);
if (answer == null) {
s_logger.warn("Unable to fetch monitoring results data from router " + router.getHostName());
return null;
}
if (answer instanceof GetRouterMonitorResultsAnswer) {
return (GetRouterMonitorResultsAnswer) answer;
} else {
s_logger.warn("Unable to fetch health checks results to router " + router.getHostName() + " Received answer " + answer.getDetails());
return new GetRouterMonitorResultsAnswer(command, false, null, answer.getDetails());
}
} catch (final Exception e) {
s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName(), e);
return null;
}
}
return null;
}
@Override
public boolean performRouterHealthChecks(long routerId) {
DomainRouterVO router = _routerDao.findById(routerId);
if (router == null) {
throw new CloudRuntimeException("Unable to find router with id " + routerId);
}
if (!RouterHealthChecksEnabled.value()) {
throw new CloudRuntimeException("Router health checks are not enabled for router: " + router);
}
s_logger.info("Running health check results for router " + router.getUuid());
final GetRouterMonitorResultsAnswer answer;
boolean success = true;
// Step 1: Update health check data on router and perform and retrieve health checks on router
if (!updateRouterHealthChecksConfig(router)) {
s_logger.warn("Unable to update health check config for fresh run successfully for router: " + router + ", so trying to fetch last result.");
success = false;
answer = fetchAndUpdateRouterHealthChecks(router, false);
} else {
s_logger.info("Successfully updated health check config for fresh run successfully for router: " + router);
answer = fetchAndUpdateRouterHealthChecks(router, true);
}
// Step 2: Update health checks values in database. We do this irrespective of new health check config.
if (answer == null || !answer.getResult()) {
success = false;
resetRouterHealthChecksAndConnectivity(routerId, false,
answer == null ? "Communication failed " : "Failed to fetch results with details: " + answer.getDetails());
} else {
resetRouterHealthChecksAndConnectivity(routerId, true, "Successfully fetched data");
updateDbHealthChecksFromRouterResponse(routerId, answer.getMonitoringResults());
}
return success;
}
protected class UpdateRouterHealthChecksConfigTask extends ManagedContextRunnable {
public UpdateRouterHealthChecksConfigTask() {
}
@Override
protected void runInContext() {
try {
final List<DomainRouterVO> routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId);
s_logger.debug("Found " + routers.size() + " running routers. ");
for (final DomainRouterVO router : routers) {
updateRouterHealthChecksConfig(router);
}
} catch (final Exception ex) {
s_logger.error("Fail to complete the UpdateRouterHealthChecksConfigTask! ", ex);
}
}
}
private SetMonitorServiceCommand createMonitorServiceCommand(DomainRouterVO router, List<MonitorServiceTO> services,
boolean reconfigure, boolean deleteFromProcessedCache) {
final SetMonitorServiceCommand command = new SetMonitorServiceCommand(services);
command.setAccessDetail(NetworkElementCommand.ROUTER_IP, getRouterControlIP(router));
command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName());
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED, RouterHealthChecksEnabled.value().toString());
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL, RouterHealthChecksBasicInterval.value().toString());
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL, RouterHealthChecksAdvancedInterval.value().toString());
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED, RouterHealthChecksToExclude.valueIn(router.getDataCenterId()));
command.setHealthChecksConfig(getRouterHealthChecksConfig(router));
command.setReconfigureAfterUpdate(reconfigure);
command.setDeleteFromProcessedCache(deleteFromProcessedCache); // As part of updating
return command;
}
/**
* Updates router health check config to the virtual router that it uses for health checks.
* @param router - the router ID that data needs to be sent to.
* @return success of whether data was sent or not
*/
private boolean updateRouterHealthChecksConfig(DomainRouterVO router) {
if (!RouterHealthChecksEnabled.value()) {
return false;
}
SetMonitorServiceCommand command = createMonitorServiceCommand(router, null,true, true);
String controlIP = getRouterControlIP(router);
if (StringUtils.isBlank(controlIP) || controlIP.equals("0.0.0.0")) {
s_logger.debug("Skipping update data on router " + router.getUuid() + " because controlIp is not correct.");
return false;
}
s_logger.info("Updating data for router health checks for router " + router.getUuid());
Answer origAnswer = null;
try {
origAnswer = _agentMgr.easySend(router.getHostId(), command);
} catch (final Exception e) {
s_logger.error("Error while sending update data for health check to router: " + router.getInstanceName(), e);
return false;
}
if (origAnswer == null) {
s_logger.error("Unable to update health checks data to router " + router.getHostName());
return false;
}
GroupAnswer answer = null;
if (origAnswer instanceof GroupAnswer) {
answer = (GroupAnswer) origAnswer;
} else {
s_logger.error("Unable to update health checks data to router " + router.getHostName() + " Received answer " + origAnswer.getDetails());
return false;
}
if (!answer.getResult()) {
s_logger.error("Unable to update health checks data to router " + router.getHostName() + ", details : " + answer.getDetails());
}
return answer.getResult();
}
private String getSystemThresholdsHealthChecksData(final DomainRouterVO router) {
return new StringBuilder()
.append("minDiskNeeded=" + RouterHealthChecksFreeDiskSpaceThreshold.valueIn(router.getDataCenterId()))
.append(",maxCpuUsage=" + RouterHealthChecksMaxCpuUsageThreshold.valueIn(router.getDataCenterId()))
.append(",maxMemoryUsage=" + RouterHealthChecksMaxMemoryUsageThreshold.valueIn(router.getDataCenterId()) + ";")
.toString();
}
private String getRouterVersionHealthChecksData(final DomainRouterVO router) {
if (router.getTemplateVersion() != null && router.getScriptsVersion() != null) {
StringBuilder routerVersion = new StringBuilder()
.append("templateVersion=" + router.getTemplateVersion())
.append(",scriptsVersion=" + router.getScriptsVersion());
return routerVersion.toString();
}
return null;
}
private void updateWithPortForwardingRules(final DomainRouterJoinVO routerJoinVO, final UserVmJoinVO vm, final StringBuilder portData) {
SearchBuilder<PortForwardingRuleVO> sbpf = portForwardingDao.createSearchBuilder();
sbpf.and("networkId", sbpf.entity().getNetworkId(), SearchCriteria.Op.EQ);
sbpf.and("instanceId", sbpf.entity().getVirtualMachineId(), SearchCriteria.Op.EQ);
SearchCriteria<PortForwardingRuleVO> scpf = sbpf.create();
scpf.setParameters("networkId", routerJoinVO.getNetworkId());
scpf.setParameters("instanceId", vm.getId());
List<PortForwardingRuleVO> portForwardingRules = portForwardingDao.search(scpf, null);
for (PortForwardingRuleVO portForwardingRule : portForwardingRules) {
portData.append("sourceIp=").append(_ipAddressDao.findById(portForwardingRule.getSourceIpAddressId()).getAddress().toString())
.append(",sourcePortStart=").append(portForwardingRule.getSourcePortStart())
.append(",sourcePortEnd=").append(portForwardingRule.getSourcePortEnd())
.append(",destIp=").append(portForwardingRule.getDestinationIpAddress())
.append(",destPortStart=").append(portForwardingRule.getDestinationPortStart())
.append(",destPortEnd=").append(portForwardingRule.getDestinationPortEnd()).append(";");
}
}
private String getStickinessPolicies(long loadBalancingRuleId) {
List<LBStickinessPolicyVO> stickinessPolicyVOs = lbStickinessPolicyDao.listByLoadBalancerId(loadBalancingRuleId, false);
if (stickinessPolicyVOs != null && stickinessPolicyVOs.size() > 0) {
StringBuilder stickiness = new StringBuilder();
for (LBStickinessPolicyVO stickinessVO : stickinessPolicyVOs) {
stickiness.append(stickinessVO.getMethodName()).append(" ");
}
return stickiness.toString().trim();
}
return "None";
}
private void updateWithLbRules(final DomainRouterJoinVO routerJoinVO, final StringBuilder loadBalancingData) {
List<? extends FirewallRuleVO> loadBalancerVOs = this.getLBRules(routerJoinVO);
for (FirewallRuleVO firewallRuleVO : loadBalancerVOs) {
List<LoadBalancerVMMapVO> vmMapVOs = _loadBalancerVMMapDao.listByLoadBalancerId(firewallRuleVO.getId(), false);
if (vmMapVOs.size() > 0) {
final NetworkOffering offering = _networkOfferingDao.findById(_networkDao.findById(routerJoinVO.getNetworkId()).getNetworkOfferingId());
if (offering.getConcurrentConnections() == null) {
loadBalancingData.append("maxconn=").append(_configDao.getValue(Config.NetworkLBHaproxyMaxConn.key()));
} else {
loadBalancingData.append("maxconn=").append(offering.getConcurrentConnections().toString());
}
loadBalancingData.append(",sourcePortStart=").append(firewallRuleVO.getSourcePortStart())
.append(",sourcePortEnd=").append(firewallRuleVO.getSourcePortEnd());
if (firewallRuleVO instanceof LoadBalancerVO) {
LoadBalancerVO loadBalancerVO = (LoadBalancerVO) firewallRuleVO;
loadBalancingData.append(",sourceIp=").append(_ipAddressDao.findById(loadBalancerVO.getSourceIpAddressId()).getAddress().toString())
.append(",destPortStart=").append(loadBalancerVO.getDefaultPortStart())
.append(",destPortEnd=").append(loadBalancerVO.getDefaultPortEnd())
.append(",algorithm=").append(loadBalancerVO.getAlgorithm())
.append(",protocol=").append(loadBalancerVO.getLbProtocol());
} else if (firewallRuleVO instanceof ApplicationLoadBalancerRuleVO) {
ApplicationLoadBalancerRuleVO appLoadBalancerVO = (ApplicationLoadBalancerRuleVO) firewallRuleVO;
loadBalancingData.append(",sourceIp=").append(appLoadBalancerVO.getSourceIp())
.append(",destPortStart=").append(appLoadBalancerVO.getDefaultPortStart())
.append(",destPortEnd=").append(appLoadBalancerVO.getDefaultPortEnd())
.append(",algorithm=").append(appLoadBalancerVO.getAlgorithm())
.append(",protocol=").append(appLoadBalancerVO.getLbProtocol());
}
loadBalancingData.append(",stickiness=").append(getStickinessPolicies(firewallRuleVO.getId()));
loadBalancingData.append(",keepAliveEnabled=").append(offering.isKeepAliveEnabled()).append(",vmIps=");
for (LoadBalancerVMMapVO vmMapVO : vmMapVOs) {
loadBalancingData.append(vmMapVO.getInstanceIp()).append(" ");
}
loadBalancingData.setCharAt(loadBalancingData.length() - 1, ';');
}
}
}
private Map<String, String> getRouterHealthChecksConfig(final DomainRouterVO router) {
Map<String, String> data = new HashMap<>();
List<DomainRouterJoinVO> routerJoinVOs = domainRouterJoinDao.searchByIds(router.getId());
StringBuilder vmsData = new StringBuilder();
StringBuilder portData = new StringBuilder();
StringBuilder loadBalancingData = new StringBuilder();
StringBuilder gateways = new StringBuilder();
gateways.append("gatewaysIps=");
for (DomainRouterJoinVO routerJoinVO : routerJoinVOs) {
if (StringUtils.isNotBlank(routerJoinVO.getGateway())) {
gateways.append(routerJoinVO.getGateway() + " ");
}
SearchBuilder<UserVmJoinVO> sbvm = userVmJoinDao.createSearchBuilder();
sbvm.and("networkId", sbvm.entity().getNetworkId(), SearchCriteria.Op.EQ);
SearchCriteria<UserVmJoinVO> scvm = sbvm.create();
scvm.setParameters("networkId", routerJoinVO.getNetworkId());
List<UserVmJoinVO> vms = userVmJoinDao.search(scvm, null);
for (UserVmJoinVO vm : vms) {
if (vm.getState() != VirtualMachine.State.Running) {
continue;
}
vmsData.append("vmName=").append(vm.getName())
.append(",macAddress=").append(vm.getMacAddress())
.append(",ip=").append(vm.getIpAddress()).append(";");
updateWithPortForwardingRules(routerJoinVO, vm, portData);
}
updateWithLbRules(routerJoinVO, loadBalancingData);
}
String routerVersion = getRouterVersionHealthChecksData(router);
data.put("virtualMachines", vmsData.toString());
data.put("gateways", gateways.toString());
data.put("portForwarding", portData.toString());
data.put("haproxyData", loadBalancingData.toString());
data.put("systemThresholds", getSystemThresholdsHealthChecksData(router));
if (routerVersion != null) {
data.put("routerVersion", routerVersion);
}
return data;
}
private List<? extends FirewallRuleVO> getLBRules(final DomainRouterJoinVO router) {
if (router.getRole() == Role.VIRTUAL_ROUTER) {
SearchBuilder<LoadBalancerVO> sblb = _loadBalancerDao.createSearchBuilder();
sblb.and("networkId", sblb.entity().getNetworkId(), SearchCriteria.Op.EQ);
sblb.and("sourceIpAddressId", sblb.entity().getSourceIpAddressId(), SearchCriteria.Op.NNULL);
SearchCriteria<LoadBalancerVO> sclb = sblb.create();
sclb.setParameters("networkId", router.getNetworkId());
return _loadBalancerDao.search(sclb, null);
} else if (router.getRole() == Role.INTERNAL_LB_VM) {
SearchBuilder<ApplicationLoadBalancerRuleVO> sbalb = applicationLoadBalancerRuleDao.createSearchBuilder();
sbalb.and("networkId", sbalb.entity().getNetworkId(), SearchCriteria.Op.EQ);
sbalb.and("sourceIpAddress", sbalb.entity().getSourceIp(), SearchCriteria.Op.NNULL);
SearchCriteria<ApplicationLoadBalancerRuleVO> sclb = sbalb.create();
sclb.setParameters("networkId", router.getNetworkId());
return applicationLoadBalancerRuleDao.search(sclb, null);
}
return Collections.emptyList();
}
protected class CheckRouterAlertsTask extends ManagedContextRunnable {
public CheckRouterAlertsTask() {
}
@ -1205,12 +1843,11 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
final List<DomainRouterVO> routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId);
s_logger.debug("Found " + routers.size() + " running routers. ");
for (final DomainRouterVO router : routers) {
final String serviceMonitoringFlag = SetServiceMonitor.valueIn(router.getDataCenterId());
// Skip the routers in VPC network or skip the routers where
// Monitor service is not enabled in the corresponding Zone
if (!Boolean.parseBoolean(serviceMonitoringFlag) || router.getVpcId() != null) {
if (!Boolean.parseBoolean(serviceMonitoringFlag)) {
continue;
}
String controlIP = getRouterControlIP(router);
@ -1253,7 +1890,7 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
final String alerts[] = answer.getAlerts();
if (alerts != null) {
final String lastAlertTimeStamp = answer.getTimeStamp();
final SimpleDateFormat sdfrmt = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
final SimpleDateFormat sdfrmt = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
sdfrmt.setLenient(false);
try {
sdfrmt.parse(lastAlertTimeStamp);
@ -1667,19 +2304,7 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
if (reprogramGuestNtwks) {
finalizeIpAssocForNetwork(cmds, router, provider, guestNetworkId, null);
finalizeNetworkRulesForNetwork(cmds, router, provider, guestNetworkId);
final NetworkOffering offering = _networkOfferingDao.findById(_networkDao.findById(guestNetworkId).getNetworkOfferingId());
// service monitoring is currently not added in RVR
if (!offering.isRedundantRouter()) {
final String serviceMonitringSet = _configDao.getValue(Config.EnableServiceMonitoring.key());
if (serviceMonitringSet != null && serviceMonitringSet.equalsIgnoreCase("true")) {
finalizeMonitorServiceOnStrat(cmds, profile, router, provider, guestNetworkId, true);
} else {
finalizeMonitorServiceOnStrat(cmds, profile, router, provider, guestNetworkId, false);
}
}
finalizeMonitorService(cmds, profile, router, provider, guestNetworkId, true);
}
finalizeUserDataAndDhcpOnStart(cmds, router, provider, guestNetworkId);
@ -1692,31 +2317,38 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
return true;
}
private void finalizeMonitorServiceOnStrat(final Commands cmds, final VirtualMachineProfile profile, final DomainRouterVO router, final Provider provider,
final long networkId, final Boolean add) {
protected void finalizeMonitorService(final Commands cmds, final VirtualMachineProfile profile, final DomainRouterVO router, final Provider provider,
final long networkId, boolean onStart) {
final NetworkOffering offering = _networkOfferingDao.findById(_networkDao.findById(networkId).getNetworkOfferingId());
if (offering.isRedundantRouter()) {
// service monitoring is currently not added in RVR
return;
}
final String serviceMonitoringSet = _configDao.getValue(Config.EnableServiceMonitoring.key());
final Boolean isMonitoringServicesEnabled = serviceMonitoringSet != null && serviceMonitoringSet.equalsIgnoreCase("true");
final NetworkVO network = _networkDao.findById(networkId);
s_logger.debug("Creating monitoring services on " + router + " start...");
// get the list of sevices for this network to monitor
final List<MonitoringServiceVO> services = new ArrayList<MonitoringServiceVO>();
if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dhcp, Provider.VirtualRouter)
|| _networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dns, Provider.VirtualRouter)) {
if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dhcp, provider)
|| _networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dns, provider)) {
final MonitoringServiceVO dhcpService = _monitorServiceDao.getServiceByName(MonitoringService.Service.Dhcp.toString());
if (dhcpService != null) {
services.add(dhcpService);
}
}
if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Lb, Provider.VirtualRouter)) {
if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Lb, provider)) {
final MonitoringServiceVO lbService = _monitorServiceDao.getServiceByName(MonitoringService.Service.LoadBalancing.toString());
if (lbService != null) {
services.add(lbService);
}
}
final List<MonitoringServiceVO> defaultServices = _monitorServiceDao.listDefaultServices(true);
services.addAll(defaultServices);
services.addAll(getDefaultServicesToMonitor(network));
final List<MonitorServiceTO> servicesTO = new ArrayList<MonitorServiceTO>();
for (final MonitoringServiceVO service : services) {
@ -1734,17 +2366,21 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
if (controlNic == null) {
throw new CloudRuntimeException("VirtualMachine " + profile.getInstanceName() + " doesn't have a control interface");
}
final SetMonitorServiceCommand command = new SetMonitorServiceCommand(servicesTO);
command.setAccessDetail(NetworkElementCommand.ROUTER_IP, controlNic.getIPv4Address());
command.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, _routerControlHelper.getRouterIpInNetwork(networkId, router.getId()));
command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName());
if (!add) {
command.setAccessDetail(NetworkElementCommand.ROUTER_MONITORING_ENABLE, add.toString());
// As part of aggregate command we don't need to reconfigure if onStart and persist in processed cache. Subsequent updates are not needed.
SetMonitorServiceCommand command = createMonitorServiceCommand(router, servicesTO, !onStart, false);
command.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, _routerControlHelper.getRouterIpInNetwork(networkId, router.getId()));
if (!isMonitoringServicesEnabled) {
command.setAccessDetail(SetMonitorServiceCommand.ROUTER_MONITORING_ENABLED, isMonitoringServicesEnabled.toString());
}
cmds.addCommand("monitor", command);
}
protected List<MonitoringServiceVO> getDefaultServicesToMonitor(final NetworkVO network) {
return _monitorServiceDao.listDefaultServices(true);
}
protected NicProfile getControlNic(final VirtualMachineProfile profile) {
final DomainRouterVO router = _routerDao.findById(profile.getId());
final DataCenterVO dcVo = _dcDao.findById(router.getDataCenterId());
@ -2599,7 +3235,22 @@ Configurable, StateListener<VirtualMachine.State, VirtualMachine.Event, VirtualM
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] { UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor, RouterAlertsCheckInterval };
return new ConfigKey<?>[] {
UseExternalDnsServers,
RouterVersionCheckEnabled,
SetServiceMonitor,
RouterAlertsCheckInterval,
RouterHealthChecksEnabled,
RouterHealthChecksBasicInterval,
RouterHealthChecksAdvancedInterval,
RouterHealthChecksConfigRefreshInterval,
RouterHealthChecksResultFetchInterval,
RouterHealthChecksFailuresToRecreateVr,
RouterHealthChecksToExclude,
RouterHealthChecksFreeDiskSpaceThreshold,
RouterHealthChecksMaxCpuUsageThreshold,
RouterHealthChecksMaxMemoryUsageThreshold
};
}
@Override

View File

@ -18,6 +18,7 @@ package com.cloud.network.router;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
@ -26,6 +27,9 @@ import java.util.Map;
import javax.inject.Inject;
import javax.naming.ConfigurationException;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.Command.OnError;
@ -34,6 +38,7 @@ import com.cloud.agent.api.PlugNicCommand;
import com.cloud.agent.api.SetupGuestNetworkCommand;
import com.cloud.agent.api.routing.AggregationControlCommand;
import com.cloud.agent.api.routing.AggregationControlCommand.Action;
import com.cloud.agent.api.to.VirtualMachineTO;
import com.cloud.agent.manager.Commands;
import com.cloud.dc.DataCenter;
import com.cloud.deploy.DeployDestination;
@ -42,7 +47,11 @@ import com.cloud.exception.ConcurrentOperationException;
import com.cloud.exception.InsufficientCapacityException;
import com.cloud.exception.OperationTimedoutException;
import com.cloud.exception.ResourceUnavailableException;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.hypervisor.HypervisorGuru;
import com.cloud.hypervisor.HypervisorGuruManager;
import com.cloud.network.IpAddress;
import com.cloud.network.MonitoringService;
import com.cloud.network.Network;
import com.cloud.network.Network.Provider;
import com.cloud.network.Network.Service;
@ -54,6 +63,8 @@ import com.cloud.network.Site2SiteVpnConnection;
import com.cloud.network.VirtualRouterProvider;
import com.cloud.network.addr.PublicIp;
import com.cloud.network.dao.IPAddressVO;
import com.cloud.network.dao.MonitoringServiceVO;
import com.cloud.network.dao.NetworkVO;
import com.cloud.network.dao.RemoteAccessVpnVO;
import com.cloud.network.vpc.NetworkACLItemDao;
import com.cloud.network.vpc.NetworkACLItemVO;
@ -72,6 +83,9 @@ import com.cloud.network.vpc.dao.PrivateIpDao;
import com.cloud.network.vpc.dao.StaticRouteDao;
import com.cloud.network.vpc.dao.VpcGatewayDao;
import com.cloud.network.vpn.Site2SiteVpnManager;
import com.cloud.service.ServiceOfferingVO;
import com.cloud.template.VirtualMachineTemplate;
import com.cloud.user.Account;
import com.cloud.user.UserStatisticsVO;
import com.cloud.utils.Pair;
import com.cloud.utils.db.EntityManager;
@ -87,14 +101,8 @@ import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.State;
import com.cloud.vm.VirtualMachineProfile;
import com.cloud.vm.VirtualMachineProfile.Param;
import com.cloud.vm.VirtualMachineProfileImpl;
import com.cloud.vm.dao.VMInstanceDao;
import com.cloud.agent.api.to.VirtualMachineTO;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.hypervisor.HypervisorGuru;
import com.cloud.hypervisor.HypervisorGuruManager;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
@Component
public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplianceManagerImpl implements VpcVirtualNetworkApplianceManager {
@ -151,8 +159,9 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian
result = false;
}
// 3) apply networking rules
if (result && params.get(Param.ReProgramGuestNetworks) != null && (Boolean) params.get(Param.ReProgramGuestNetworks) == true) {
sendNetworkRulesToRouter(router.getId(), network.getId());
if (result) {
boolean reprogramNetwork = params != null && params.get(Param.ReProgramGuestNetworks) != null && (Boolean) params.get(Param.ReProgramGuestNetworks) == true;
sendNetworkRulesToRouter(router.getId(), network.getId(), reprogramNetwork);
}
} catch (final Exception ex) {
s_logger.warn("Failed to add router " + router + " to network " + network + " due to ", ex);
@ -454,19 +463,25 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian
throw new CloudRuntimeException("Cannot find related provider of virtual router provider: " + vrProvider.getType().toString());
}
if (reprogramGuestNtwks && publicNics.size() > 0) {
finalizeMonitorService(cmds, profile, domainRouterVO, provider, publicNics.get(0).second().getId(), true);
}
for (final Pair<Nic, Network> nicNtwk : guestNics) {
final Nic guestNic = nicNtwk.first();
final long guestNetworkId = guestNic.getNetworkId();
final AggregationControlCommand startCmd = new AggregationControlCommand(Action.Start, domainRouterVO.getInstanceName(), controlNic.getIPv4Address(), _routerControlHelper.getRouterIpInNetwork(
guestNic.getNetworkId(), domainRouterVO.getId()));
guestNetworkId, domainRouterVO.getId()));
cmds.addCommand(startCmd);
if (reprogramGuestNtwks) {
finalizeIpAssocForNetwork(cmds, domainRouterVO, provider, guestNic.getNetworkId(), vlanMacAddress);
finalizeNetworkRulesForNetwork(cmds, domainRouterVO, provider, guestNic.getNetworkId());
finalizeIpAssocForNetwork(cmds, domainRouterVO, provider, guestNetworkId, vlanMacAddress);
finalizeNetworkRulesForNetwork(cmds, domainRouterVO, provider, guestNetworkId);
finalizeMonitorService(cmds, profile, domainRouterVO, provider, guestNetworkId, true);
}
finalizeUserDataAndDhcpOnStart(cmds, domainRouterVO, provider, guestNic.getNetworkId());
finalizeUserDataAndDhcpOnStart(cmds, domainRouterVO, provider, guestNetworkId);
final AggregationControlCommand finishCmd = new AggregationControlCommand(Action.Finish, domainRouterVO.getInstanceName(), controlNic.getIPv4Address(), _routerControlHelper.getRouterIpInNetwork(
guestNic.getNetworkId(), domainRouterVO.getId()));
guestNetworkId, domainRouterVO.getId()));
cmds.addCommand(finishCmd);
}
@ -476,6 +491,14 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian
return true;
}
@Override
protected List<MonitoringServiceVO> getDefaultServicesToMonitor(NetworkVO network) {
if (network.getTrafficType() == TrafficType.Public) {
return Arrays.asList(_monitorServiceDao.getServiceByName(MonitoringService.Service.Ssh.toString()));
}
return super.getDefaultServicesToMonitor(network);
}
@Override
protected void finalizeNetworkRulesForNetwork(final Commands cmds, final DomainRouterVO domainRouterVO, final Provider provider, final Long guestNetworkId) {
@ -495,7 +518,7 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian
}
}
protected boolean sendNetworkRulesToRouter(final long routerId, final long networkId) throws ResourceUnavailableException {
protected boolean sendNetworkRulesToRouter(final long routerId, final long networkId, final boolean reprogramNetwork) throws ResourceUnavailableException {
final DomainRouterVO router = _routerDao.findById(routerId);
final Commands cmds = new Commands(OnError.Continue);
@ -508,10 +531,26 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian
throw new CloudRuntimeException("Cannot find related provider of virtual router provider: " + vrProvider.getType().toString());
}
finalizeNetworkRulesForNetwork(cmds, router, provider, networkId);
if (reprogramNetwork) {
finalizeNetworkRulesForNetwork(cmds, router, provider, networkId);
}
finalizeMonitorService(cmds, getVirtualMachineProfile(router), router, provider, networkId, false);
return _nwHelper.sendCommandsToRouter(router, cmds);
}
private VirtualMachineProfile getVirtualMachineProfile(DomainRouterVO router) {
final ServiceOfferingVO offering = _serviceOfferingDao.findById(router.getId(), router.getServiceOfferingId());
final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, router.getTemplateId());
final Account owner = _entityMgr.findById(Account.class, router.getAccountId());
final VirtualMachineProfileImpl profile = new VirtualMachineProfileImpl(router, template, offering, owner, null);
for (final NicProfile nic : _networkMgr.getNicProfiles(router)) {
profile.addNic(nic);
}
return profile;
}
/**
* @param router
* @param add

View File

@ -46,6 +46,7 @@ import org.apache.cloudstack.api.command.admin.vpc.UpdateVPCOfferingCmd;
import org.apache.cloudstack.api.command.user.vpc.ListPrivateGatewaysCmd;
import org.apache.cloudstack.api.command.user.vpc.ListStaticRoutesCmd;
import org.apache.cloudstack.api.command.user.vpc.ListVPCOfferingsCmd;
import org.apache.cloudstack.api.command.user.vpc.RestartVPCCmd;
import org.apache.cloudstack.context.CallContext;
import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService;
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
@ -1697,16 +1698,21 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis
return success;
}
@Override
@ActionEvent(eventType = EventTypes.EVENT_VPC_RESTART, eventDescription = "restarting vpc")
public boolean restartVpc(final long vpcId, final boolean cleanUp, final boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException,
public boolean restartVpc(final RestartVPCCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException,
InsufficientCapacityException {
final Account callerAccount = CallContext.current().getCallingAccount();
final long vpcId = cmd.getId();
final boolean cleanUp = cmd.getCleanup();
final boolean makeRedundant = cmd.getMakeredundant();
final User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId());
final ReservationContext context = new ReservationContextImpl(null, null, callerUser, callerAccount);
return restartVpc(vpcId, cleanUp, makeRedundant, callerUser);
}
// Verify input parameters
@Override
@ActionEvent(eventType = EventTypes.EVENT_VPC_RESTART, eventDescription = "restarting vpc")
public boolean restartVpc(Long vpcId, boolean cleanUp, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
Vpc vpc = getActiveVpc(vpcId);
if (vpc == null) {
final InvalidParameterValueException ex = new InvalidParameterValueException("Unable to find Enabled VPC by id specified");
@ -1714,6 +1720,8 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis
throw ex;
}
Account callerAccount = _accountMgr.getActiveAccountById(user.getAccountId());
final ReservationContext context = new ReservationContextImpl(null, null, user, callerAccount);
_accountMgr.checkAccess(callerAccount, null, false, vpc);
s_logger.debug("Restarting VPC " + vpc);

View File

@ -171,6 +171,7 @@ import org.apache.cloudstack.api.command.admin.router.ConfigureOvsElementCmd;
import org.apache.cloudstack.api.command.admin.router.ConfigureVirtualRouterElementCmd;
import org.apache.cloudstack.api.command.admin.router.CreateVirtualRouterElementCmd;
import org.apache.cloudstack.api.command.admin.router.DestroyRouterCmd;
import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd;
import org.apache.cloudstack.api.command.admin.router.ListOvsElementsCmd;
import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd;
import org.apache.cloudstack.api.command.admin.router.ListVirtualRouterElementsCmd;
@ -3115,6 +3116,7 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe
cmdList.add(ListMgmtsCmd.class);
cmdList.add(GetUploadParamsForIsoCmd.class);
cmdList.add(ListTemplateOVFProperties.class);
cmdList.add(GetRouterHealthCheckResultsCmd.class);
// Out-of-band management APIs for admins
cmdList.add(EnableOutOfBandManagementForHostCmd.class);

View File

@ -16,17 +16,16 @@
// under the License.
package com.cloud.keystore;
import junit.framework.TestCase;
import org.apache.cloudstack.api.response.AlertResponse;
import org.apache.cloudstack.api.response.UserVmResponse;
import org.apache.log4j.Logger;
import org.junit.After;
import org.junit.Before;
import org.apache.cloudstack.api.response.AlertResponse;
import org.apache.cloudstack.api.response.UserVmResponse;
import com.cloud.api.ApiSerializerHelper;
import junit.framework.TestCase;
public class KeystoreTest extends TestCase {
private final static Logger s_logger = Logger.getLogger(KeystoreTest.class);

View File

@ -213,11 +213,16 @@ public class MockNetworkManagerImpl extends ManagerBase implements NetworkOrches
return false;
}
@Override
public boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException {
return false;
}
/* (non-Javadoc)
* @see com.cloud.network.NetworkService#restartNetwork(com.cloud.api.commands.RestartNetworkCmd, boolean)
*/
@Override
public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException,
public boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException,
InsufficientCapacityException {
// TODO Auto-generated method stub
return false;

View File

@ -248,6 +248,11 @@ public class MockVpcVirtualNetworkApplianceManager extends ManagerBase implement
return null; //To change body of implemented methods use File | Settings | File Templates.
}
@Override
public boolean performRouterHealthChecks(long routerId) {
return false;
}
@Override
public boolean prepareAggregatedExecution(final Network network, final List<DomainRouterVO> routers) throws AgentUnavailableException {
return true; //To change body of implemented methods use File | Settings | File Templates.

View File

@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
/var/log/monitor.log {
rotate 5
maxsize 10M
missingok
notifempty
compress
copytruncate
}

View File

@ -0,0 +1,24 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
/var/log/routerServiceMonitor.log {
rotate 5
maxsize 10M
missingok
notifempty
compress
copytruncate
}

View File

@ -17,27 +17,67 @@
import logging
from cs.CsDatabag import CsDataBag
from CsFile import CsFile
import json
MON_CONFIG = "/etc/monitor.conf"
HC_CONFIG = "/root/health_checks_data.json"
class CsMonitor(CsDataBag):
""" Manage dhcp entries """
""" Manage Monitor script schedule and health checks for router """
def process(self):
if "config" not in self.dbag:
return
procs = [x.strip() for x in self.dbag['config'].split(',')]
file = CsFile(MON_CONFIG)
for proc in procs:
bits = [x for x in proc.split(':')]
if len(bits) < 5:
continue
for i in range(0, 4):
file.add(bits[i], -1)
file.commit()
def get_basic_check_interval(self):
return self.dbag["health_checks_basic_run_interval"] if "health_checks_basic_run_interval" in self.dbag else 3
def get_advanced_check_interval(self):
return self.dbag["health_checks_advanced_run_interval"] if "health_checks_advanced_run_interval" in self.dbag else 0
def setupMonitorConfigFile(self):
if "config" in self.dbag:
procs = [x.strip() for x in self.dbag['config'].split(',')]
file = CsFile(MON_CONFIG)
for proc in procs:
bits = [x for x in proc.split(':')]
if len(bits) < 5:
continue
for i in range(0, 4):
file.add(bits[i], -1)
file.commit()
def setupHealthCheckCronJobs(self):
cron_rep_basic = self.get_basic_check_interval()
cron_rep_advanced = self.get_advanced_check_interval()
cron = CsFile("/etc/cron.d/process")
cron.deleteLine("root /usr/bin/python /root/monitorServices.py")
cron.add("SHELL=/bin/bash", 0)
cron.add("PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin", 1)
cron.add("*/3 * * * * root /usr/bin/python /root/monitorServices.py", -1)
if cron_rep_basic > 0:
cron.add("*/" + str(cron_rep_basic) + " * * * * root /usr/bin/python /root/monitorServices.py basic", -1)
if cron_rep_advanced > 0:
cron.add("*/" + str(cron_rep_advanced) + " * * * * root /usr/bin/python /root/monitorServices.py advanced", -1)
cron.commit()
def setupHealthChecksConfigFile(self):
hc_data = {}
hc_data["health_checks_basic_run_interval"] = self.get_basic_check_interval()
hc_data["health_checks_advanced_run_interval"] = self.get_advanced_check_interval()
hc_data["health_checks_enabled"] = self.dbag["health_checks_enabled"] if "health_checks_enabled" in self.dbag else False
if "excluded_health_checks" in self.dbag:
excluded_checks = self.dbag["excluded_health_checks"]
hc_data["excluded_health_checks"] = [ch.strip() for ch in excluded_checks.split(",")] if len(excluded_checks) > 0 else []
else:
hc_data["excluded_health_checks"] = []
if "health_checks_config" in self.dbag:
hc_data["health_checks_config"] = self.dbag["health_checks_config"]
else:
hc_data["health_checks_config"] = {}
with open(HC_CONFIG, 'w') as f:
json.dump(hc_data, f, ensure_ascii=False, indent=4)
def process(self):
self.setupMonitorConfigFile()
self.setupHealthChecksConfigFile()
self.setupHealthCheckCronJobs()

View File

@ -22,4 +22,15 @@ def merge(dbag, data):
if "config" in data:
dbag['config'] = data["config"]
if "health_checks_enabled" in data:
dbag["health_checks_enabled"] = data["health_checks_enabled"]
if "health_checks_basic_run_interval" in data:
dbag["health_checks_basic_run_interval"] = data["health_checks_basic_run_interval"]
if "health_checks_advanced_run_interval" in data:
dbag["health_checks_advanced_run_interval"] = data["health_checks_advanced_run_interval"]
if "excluded_health_checks" in data:
dbag["excluded_health_checks"] = data["excluded_health_checks"]
if "health_checks_config" in data:
dbag["health_checks_config"] = data["health_checks_config"]
return dbag

View File

@ -0,0 +1,55 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# getRouterMonitorResults.sh --- Send the monitor results to Management Server
if [ "$1" == "true" ]
then
python /root/monitorServices.py > /dev/null
fi
printf "FAILING CHECKS:\n"
if [ -f /root/basic_failing_health_checks ]
then
echo `cat /root/basic_failing_health_checks`
fi
if [ -f /root/advanced_failing_health_checks ]
then
echo `cat /root/advanced_failing_health_checks`
fi
printf "MONITOR RESULTS:\n"
echo "{\"basic\":"
if [ -f /root/basic_monitor_results.json ]
then
echo `cat /root/basic_monitor_results.json`
else
echo "{}"
fi
echo ",\"advanced\":"
if [ -f /root/advanced_monitor_results.json ]
then
echo `cat /root/advanced_monitor_results.json`
else
echo "{}"
fi
echo "}"

View File

@ -301,6 +301,7 @@ class QueueFile:
if self.keep:
self.__moveFile(filename, self.configCache + "/processed")
else:
logging.debug("Processed file deleted: %s and not kept in /processed", filename)
os.remove(filename)
updateDataBag(self)

View File

@ -29,7 +29,8 @@ import json
logging.basicConfig(filename='/var/log/cloud.log', level=logging.INFO, format='%(asctime)s %(filename)s %(funcName)s:%(lineno)d %(message)s')
# first commandline argument should be the file to process
if (len(sys.argv) != 2):
argc = len(sys.argv)
if argc != 2 and argc != 3:
logging.error("Invalid usage, args passed: %s" % sys.argv)
sys.exit(1)
@ -49,6 +50,9 @@ def finish_config():
def process_file():
logging.info("Processing JSON file %s" % sys.argv[1])
qf = QueueFile()
if len(sys.argv) > 2 and sys.argv[2].lower() == "false":
qf.keep = False
qf.setFile(sys.argv[1])
qf.load(None)
# These can be safely deferred, dramatically speeding up loading times

View File

@ -0,0 +1,20 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Needed to expose utility as package outside for monitorServices.py.
# This directory should only contain executables for health checks.

View File

@ -0,0 +1,56 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path, statvfs
from subprocess import *
from utility import getHealthChecksData
def main():
entries = getHealthChecksData("systemThresholds")
data = {}
if entries is not None and len(entries) == 1:
data = entries[0]
if "maxCpuUsage" not in data:
print "Missing maxCpuUsage in health_checks_data systemThresholds, skipping"
exit(0)
maxCpuUsage = float(data["maxCpuUsage"])
cmd = "top -b -n2 -p 1 | fgrep \"Cpu(s)\" | tail -1 | " \
"awk -F 'id,' " \
"'{ split($1, vs, \",\"); idle=vs[length(vs)]; " \
"sub(\"%\", \"\", idle); printf \"%.2f\", 100 - idle }'"
pout = Popen(cmd, shell=True, stdout=PIPE)
if pout.wait() == 0:
currentUsage = float(pout.communicate()[0].strip())
if currentUsage > maxCpuUsage:
print "CPU Usage " + str(currentUsage) + \
"% has crossed threshold of " + str(maxCpuUsage) + "%"
exit(1)
print "CPU Usage within limits with current at " \
+ str(currentUsage) + "%"
exit(0)
else:
print "Failed to retrieve cpu usage using " + cmd
exit(1)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "basic":
main()

View File

@ -0,0 +1,69 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path
from utility import getHealthChecksData
def main():
vMs = getHealthChecksData("virtualMachines")
if vMs is None or len(vMs) == 0:
print "No VMs running data available, skipping"
exit(0)
with open('/etc/dhcphosts.txt', 'r') as hostsFile:
allHosts = hostsFile.readlines()
hostsFile.close()
failedCheck = False
failureMessage = "Missing elements in dhcphosts.txt - \n"
for vM in vMs:
entry = vM["macAddress"] + " " + vM["ip"] + " " + vM["vmName"]
foundEntry = False
for host in allHosts:
host = host.strip().split(',')
if len(host) < 4:
continue
if host[0].strip() == vM["macAddress"] and host[1].strip() == vM["ip"]\
and host[2].strip() == vM["vmName"]:
foundEntry = True
break
nonDefaultSet = "set:" + vM["ip"].replace(".", "_")
if host[0].strip() == vM["macAddress"] and host[1].strip() == nonDefaultSet \
and host[2].strip() == vM["ip"] and host[3].strip() == vM["vmName"]:
foundEntry = True
break
if not foundEntry:
failedCheck = True
failureMessage = failureMessage + entry + ", "
if failedCheck:
print failureMessage[:-2]
exit(1)
else:
print "All " + str(len(vMs)) + " VMs are present in dhcphosts.txt"
exit(0)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "advanced":
main()

View File

@ -0,0 +1,47 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path, statvfs
from utility import getHealthChecksData
def main():
entries = getHealthChecksData("systemThresholds")
data = {}
if entries is not None and len(entries) == 1:
data = entries[0]
if "minDiskNeeded" not in data:
print "Missing minDiskNeeded in health_checks_data systemThresholds, skipping"
exit(0)
minDiskNeeded = float(data["minDiskNeeded"]) * 1024
s = statvfs('/')
freeSpace = (s.f_bavail * s.f_frsize) / 1024
if (freeSpace < minDiskNeeded):
print "Insufficient free space is " + str(freeSpace/1024) + " MB"
exit(1)
else:
print "Sufficient free space is " + str(freeSpace/1024) + " MB"
exit(0)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "basic":
main()

View File

@ -0,0 +1,59 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path
from utility import getHealthChecksData
def main():
vMs = getHealthChecksData("virtualMachines")
if vMs is None or len(vMs) == 0:
print "No VMs running data available, skipping"
exit(0)
with open('/etc/hosts', 'r') as hostsFile:
allHosts = hostsFile.readlines()
hostsFile.close()
failedCheck = False
failureMessage = "Missing entries for VMs in /etc/hosts -\n"
for vM in vMs:
foundEntry = False
for host in allHosts:
components = host.split('\t')
if len(components) == 2 and components[0].strip() == vM["ip"] \
and components[1].strip() == vM["vmName"]:
foundEntry = True
break
if not foundEntry:
failedCheck = True
failureMessage = failureMessage + vM["ip"] + " " + vM["vmName"] + ", "
if failedCheck:
print failureMessage[:-2]
exit(1)
else:
print "All " + str(len(vMs)) + " VMs are present in /etc/hosts"
exit(0)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "advanced":
main()

View File

@ -0,0 +1,57 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path
from subprocess import *
from utility import getHealthChecksData
def main():
gws = getHealthChecksData("gateways")
if gws is None and len(gws) == 0:
print "No gateways data available, skipping"
exit(0)
unreachableGateWays = []
gwsList = gws[0]["gatewaysIps"].strip().split(' ')
for gw in gwsList:
if len(gw) == 0:
continue
reachableGw = False
for i in range(5):
pingCmd = "ping " + gw + " -c 5 -w 10"
pout = Popen(pingCmd, shell=True, stdout=PIPE)
if pout.wait() == 0:
reachableGw = True
break
if not reachableGw:
unreachableGateWays.append(gw)
if len(unreachableGateWays) == 0:
print "All " + str(len(gws)) + " gateways are reachable via ping"
exit(0)
else:
print "Unreachable gateways found-"
print unreachableGateWays
exit(1)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "basic":
main()

View File

@ -0,0 +1,134 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path
from utility import getHealthChecksData, formatPort
def checkMaxconn(haproxyData, haCfgSections):
if "maxconn" in haproxyData and "maxconn" in haCfgSections["global"]:
if haproxyData["maxconn"] != haCfgSections["global"]["maxconn"][0].strip():
print "global maxconn mismatch occured"
return False
return True
def checkLoadBalance(haproxyData, haCfgSections):
correct = True
for lbSec in haproxyData:
srcServer = lbSec["sourceIp"].replace('.', '_') + "-" + \
formatPort(lbSec["sourcePortStart"],
lbSec["sourcePortEnd"])
secName = "listen " + srcServer
if secName not in haCfgSections:
print "Missing section for load balancing " + secName + "\n"
correct = False
else:
cfgSection = haCfgSections[secName]
if "server" in cfgSection:
if lbSec["algorithm"] != cfgSection["balance"][0]:
print "Incorrect balance method for " + secName + \
"Expected : " + lbSec["algorithm"] + \
" but found " + cfgSection["balance"][0] + "\n"
correct = False
bindStr = lbSec["sourceIp"] + ":" + formatPort(lbSec["sourcePortStart"], lbSec["sourcePortEnd"])
if cfgSection["bind"][0] != bindStr:
print "Incorrect bind string found. Expected " + bindStr + " but found " + cfgSection["bind"][0] + "."
correct = False
if (lbSec["sourcePortStart"] == "80" and lbSec["sourcePortEnd"] == "80" and lbSec["keepAliveEnabled"] == "false") \
or (lbSec["stickiness"].find("AppCookie") != -1 or lbSec["stickiness"].find("LbCookie") != -1):
if not ("mode" in cfgSection and cfgSection["mode"][0] == "http"):
print "Expected HTTP mode but not found"
correct = False
expectedServerIps = lbSec["vmIps"].split(" ")
for expectedServerIp in expectedServerIps:
pattern = expectedServerIp + ":" + \
formatPort(lbSec["destPortStart"],
lbSec["destPortEnd"])
foundPattern = False
for server in cfgSection["server"]:
s = server.split()
if s[0].strip().find(srcServer + "_") == 0 and s[1].strip() == pattern:
foundPattern = True
break
if not foundPattern:
correct = False
print "Missing load balancing for " + pattern + ". "
return correct
def main():
'''
Checks for max con and each load balancing rule - source ip, ports and destination
ips and ports. Also checks for http mode. Does not check for stickiness policies.
'''
haproxyData = getHealthChecksData("haproxyData")
if haproxyData is None or len(haproxyData) == 0:
print "No data provided to check, skipping"
exit(0)
with open("/etc/haproxy/haproxy.cfg", 'r') as haCfgFile:
haCfgLines = haCfgFile.readlines()
haCfgFile.close()
if len(haCfgLines) == 0:
print "Unable to read config file /etc/haproxy/haproxy.cfg"
exit(1)
haCfgSections = {}
currSection = None
currSectionDict = {}
for line in haCfgLines:
line = line.strip()
if len(line) == 0:
if currSection is not None and len(currSectionDict) > 0:
haCfgSections[currSection] = currSectionDict
currSection = None
currSectionDict = {}
continue
if currSection is None:
currSection = line
else:
lineSec = line.split(' ', 1)
if lineSec[0] not in currSectionDict:
currSectionDict[lineSec[0]] = []
currSectionDict[lineSec[0]].append(lineSec[1] if len(lineSec) > 1 else '')
checkMaxConn = checkMaxconn(haproxyData[0], haCfgSections)
checkLbRules = checkLoadBalance(haproxyData, haCfgSections)
if checkMaxConn and checkLbRules:
print "All checks pass"
exit(0)
else:
exit(1)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "advanced":
main()

View File

@ -0,0 +1,81 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path
from subprocess import *
from utility import getHealthChecksData, formatPort
def main():
portForwards = getHealthChecksData("portForwarding")
if portForwards is None or len(portForwards) == 0:
print "No portforwarding rules provided to check, skipping"
exit(0)
failedCheck = False
failureMessage = "Missing port forwarding rules in Iptables-\n "
for portForward in portForwards:
entriesExpected = []
destIp = portForward["destIp"]
srcIpText = "-d " + portForward["sourceIp"]
srcPortText = "--dport " + formatPort(portForward["sourcePortStart"], portForward["sourcePortEnd"], ":")
dstText = destIp + ":" + formatPort(portForward["destPortStart"], portForward["destPortEnd"], "-")
for algo in [["PREROUTING", "--to-destination"],
["OUTPUT", "--to-destination"],
["POSTROUTING", "--to-source"]]:
entriesExpected.append([algo[0], srcIpText, srcPortText, algo[1] + " " + dstText])
fetchIpTableEntriesCmd = "iptables-save | grep " + destIp
pout = Popen(fetchIpTableEntriesCmd, shell=True, stdout=PIPE)
if pout.wait() != 0:
failedCheck = True
failureMessage = failureMessage + "Unable to execute iptables-save command " \
"for fetching rules by " + fetchIpTableEntriesCmd + "\n"
continue
ipTablesMatchingEntries = pout.communicate()[0].strip().split('\n')
for pfEntryListExpected in entriesExpected:
foundPfEntryList = False
for ipTableEntry in ipTablesMatchingEntries:
# Check if all expected parts of pfEntryList
# is present in this ipTableEntry
foundAll = True
for expectedEntry in pfEntryListExpected:
if ipTableEntry.find(expectedEntry) == -1:
foundAll = False
break
if foundAll:
foundPfEntryList = True
break
if not foundPfEntryList:
failedCheck = True
failureMessage = failureMessage + str(pfEntryListExpected) + "\n"
if failedCheck:
print failureMessage
exit(1)
else:
print "Found all entries (count " + str(len(portForwards)) + ") in iptables"
exit(0)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "advanced":
main()

View File

@ -0,0 +1,55 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path, statvfs
from subprocess import *
from utility import getHealthChecksData
def main():
entries = getHealthChecksData("systemThresholds")
data = {}
if entries is not None and len(entries) == 1:
data = entries[0]
if "maxMemoryUsage" not in data:
print "Missing maxMemoryUsage in health_checks_data " + \
"systemThresholds, skipping"
exit(0)
maxMemoryUsage = float(data["maxMemoryUsage"])
cmd = "free | awk 'FNR == 2 { print $3 * 100 / $2 }'"
pout = Popen(cmd, shell=True, stdout=PIPE)
if pout.wait() == 0:
currentUsage = float(pout.communicate()[0].strip())
if currentUsage > maxMemoryUsage:
print "Memory Usage " + str(currentUsage) + \
"% has crossed threshold of " + str(maxMemoryUsage) + "%"
exit(1)
print "Memory Usage within limits with current at " + \
str(currentUsage) + "%"
exit(0)
else:
print "Failed to retrieve memory usage using " + cmd
exit(1)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "basic":
main()

View File

@ -0,0 +1,83 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from os import sys, path, statvfs
from utility import getHealthChecksData
def getFirstLine(file=None):
if file is not None and path.isfile(file):
ret = None
with open(file, 'r') as oFile:
lines = oFile.readlines()
if len(lines) > 0:
ret = lines[0].strip()
oFile.close()
return ret
else:
return None
def main():
entries = getHealthChecksData("routerVersion")
data = {}
if entries is not None and len(entries) == 1:
data = entries[0]
if len(data) == 0:
print "Missing routerVersion in health_checks_data, skipping"
exit(0)
templateVersionMatches = True
scriptVersionMatches = True
if "templateVersion" in data:
expected = data["templateVersion"].strip()
releaseFile = "/etc/cloudstack-release"
found = getFirstLine(releaseFile)
if found is None:
print "Release version not yet setup at " + releaseFile +\
", skipping."
elif expected != found:
print "Template Version mismatch. Expected: " + \
expected + ", found: " + found
templateVersionMatches = False
if "scriptsVersion" in data:
expected = data["scriptsVersion"].strip()
sigFile = "/var/cache/cloud/cloud-scripts-signature"
found = getFirstLine(sigFile)
if found is None:
print "Scripts signature is not yet setup at " + sigFile +\
", skipping"
if expected != found:
print "Scripts Version mismatch. Expected: " + \
expected + ", found: " + found
scriptVersionMatches = False
if templateVersionMatches and scriptVersionMatches:
print "Template and scripts version match successful"
exit(0)
else:
exit(1)
if __name__ == "__main__":
if len(sys.argv) == 2 and sys.argv[1] == "basic":
main()

View File

@ -0,0 +1,19 @@
#!/usr/bin/python
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from sharedFunctions import getHealthChecksData, formatPort

View File

@ -0,0 +1,54 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import json
def getHealthChecksData(additionalDataKey=None):
with open('/root/health_checks_data.json', 'r') as hc_data_file:
hc_data = json.load(hc_data_file)
# If no specific key is requested return all the data as JSON
if additionalDataKey is None:
return hc_data
if additionalDataKey not in hc_data["health_checks_config"]:
return None
data = hc_data["health_checks_config"][additionalDataKey].strip().split(";")
addData = []
for line in data:
line = line.strip()
if len(line) == 0:
continue
entries = line.split(',')
d = {}
for entry in entries:
entry = entry.strip()
if len(entry) == 0:
continue
keyVal = entry.split("=")
if len(keyVal) == 2:
d[keyVal[0].strip()] = keyVal[1].strip()
if len(d) > 0:
addData.append(d)
return addData
def formatPort(portStart, portEnd, delim="-"):
return portStart if portStart == portEnd else portStart + delim + portEnd

View File

@ -16,16 +16,15 @@
# specific language governing permissions and limitations
# under the License.
from ConfigParser import SafeConfigParser
from subprocess import *
from os import path
from datetime import datetime
import time
import os
import logging
import json
from os import sys, path
from health_checks.utility import getHealthChecksData
class StatusCodes:
SUCCESS = 0
@ -42,15 +41,15 @@ class Log:
NOTIF = 'NOTIF'
class Config:
MONIT_AFTER_MINS = 30
SLEEP_SEC = 1
RETRY_ITERATIONS = 10
RETRY_FOR_RESTART = 5
MONITOR_LOG = '/var/log/monitor.log'
UNMONIT_PS_FILE = '/etc/unmonit_psList.txt'
HEALTH_CHECKS_DIR = 'health_checks'
MONITOR_RESULT_FILE_SUFFIX = 'monitor_results.json'
FAILING_CHECKS_FILE = 'failing_health_checks'
def getConfig( config_file_path = "/etc/monitor.conf" ):
def getServicesConfig( config_file_path = "/etc/monitor.conf" ):
"""
Reads the process configuration from the config file.
Config file contains the processes to be monitored.
@ -66,7 +65,7 @@ def getConfig( config_file_path = "/etc/monitor.conf" ):
for name, value in parser.items(section):
process_dict[section][name] = value
# printd (" %s = %r" % (name, value))
printd (" %s = %r" % (name, value))
return process_dict
@ -77,12 +76,12 @@ def printd (msg):
#for debug
#print msg
return 0
f= open(Config.MONITOR_LOG,'r+')
f= open(Config.MONITOR_LOG, 'w' if not path.isfile(Config.MONITOR_LOG) else 'r+')
f.seek(0, 2)
f.write(str(msg)+"\n")
f.close()
print str(msg)
def raisealert(severity, msg, process_name=None):
""" Writes the alert message"""
@ -97,6 +96,7 @@ def raisealert(severity, msg, process_name=None):
logging.info(log)
msg = 'logger -t monit '+ log
pout = Popen(msg, shell=True, stdout=PIPE)
print "[Alert] " + msg
def isPidMatchPidFile(pidfile, pids):
@ -126,7 +126,7 @@ def isPidMatchPidFile(pidfile, pids):
fd.close()
return StatusCodes.FAILED
printd("file content "+str(inp))
printd("file content of pidfile " + pidfile + " = " + str(inp).strip())
printd(pids)
tocheck_pid = inp.strip()
for item in pids:
@ -152,7 +152,7 @@ def checkProcessRunningStatus(process_name, pidFile):
#check there is only one pid or not
if exitStatus == 0:
pids = temp_out.split(' ')
pids = temp_out.strip().split(' ')
printd("pid(s) of process %s are %s " %(process_name, pids))
#there is more than one process so match the pid file
@ -181,11 +181,10 @@ def restartService(service_name):
return False
def checkProcessStatus( process ):
"""
Check the process running status, if not running tries to restart
Returns the process status and if it was restarted
"""
process_name = process.get('processname')
service_name = process.get('servicename')
@ -197,13 +196,13 @@ def checkProcessStatus( process ):
cmd=''
if process_name is None:
printd ("\n Invalid Process Name")
return StatusCodes.INVALID_INP
return StatusCodes.INVALID_INP, False
status, pids = checkProcessRunningStatus(process_name, pidfile)
if status == True:
printd("The process is running ....")
return StatusCodes.RUNNING
return StatusCodes.RUNNING, False
else:
printd("Process %s is not running trying to recover" %process_name)
#Retry the process state for few seconds
@ -243,138 +242,151 @@ def checkProcessStatus( process ):
raisealert(Log.ALERT,process_name,msg)
printd("Restart failed after number of retries")
return StatusCodes.STOPPED
return StatusCodes.STOPPED, False
return StatusCodes.RUNNING
return StatusCodes.RUNNING, True
def monitProcess( processes_info ):
"""
Monitors the processes which got from the config file
"""
checkStartTime = time.time()
service_status = {}
failing_services = []
if len( processes_info ) == 0:
printd("Invalid Input")
return StatusCodes.INVALID_INP
printd("No config items provided - means a redundant VR or a VPC Router")
return service_status, failing_services
dict_unmonit={}
umonit_update={}
unMonitPs=False
if not path.isfile(Config.UNMONIT_PS_FILE):
printd('Unmonit File not exist')
else:
#load the dictionary with unmonit process list
dict_unmonit = loadPsFromUnMonitFile()
print "[Process Info] " + json.dumps(processes_info)
#time for noting process down time
csec = repr(time.time()).split('.')[0]
for process,properties in processes_info.items():
#skip the process it its time stamp less than Config.MONIT_AFTER_MINS
printd ("checking the service %s \n" %process)
if not is_emtpy(dict_unmonit):
if dict_unmonit.has_key(process):
ts = dict_unmonit[process]
if checkPsTimeStampForMonitor (csec, ts, properties) == False:
unMonitPs = True
continue
if checkProcessStatus( properties) != StatusCodes.RUNNING:
printd ("---------------------------\nchecking the service %s\n---------------------------- " %process)
serviceName = process + ".service"
processStatus, wasRestarted = checkProcessStatus(properties)
if processStatus != StatusCodes.RUNNING:
printd( "\n Service %s is not Running"%process)
#add this process into unmonit list
printd ("updating the service for unmonit %s\n" %process)
umonit_update[process]=csec
checkEndTime = time.time()
service_status[serviceName] = {
"success": "false",
"lastUpdate": str(int(checkStartTime * 1000)),
"lastRunDuration": str((checkEndTime - checkStartTime) * 1000),
"message": "service down at last check " + str(csec)
}
failing_services.append(serviceName)
else:
checkEndTime = time.time()
service_status[serviceName] = {
"success": "true",
"lastUpdate": str(int(checkStartTime * 1000)),
"lastRunDuration": str((checkEndTime - checkStartTime) * 1000),
"message": "service is running" + (", was restarted" if wasRestarted else "")
}
#if dict is not empty write to file else delete it
if not is_emtpy(umonit_update):
writePsListToUnmonitFile(umonit_update)
return service_status, failing_services
def execute(script, checkType = "basic"):
checkStartTime = time.time()
cmd = "./" + script + " " + checkType
printd ("Executing health check script command: " + cmd)
pout = Popen(cmd, shell=True, stdout=PIPE)
exitStatus = pout.wait()
output = pout.communicate()[0].strip()
checkEndTime = time.time()
if exitStatus == 0:
if len(output) > 0:
printd("Successful execution of " + script)
return {
"success": "true",
"lastUpdate": str(int(checkStartTime * 1000)),
"lastRunDuration": str((checkEndTime - checkStartTime) * 1000),
"message": output
}
return {} #Skip script if no output is received
else:
if is_emtpy(umonit_update) and unMonitPs == False:
#delete file it is there
removeFile(Config.UNMONIT_PS_FILE)
printd("Script execution failed " + script)
return {
"success": "false",
"lastUpdate": str(int(checkStartTime * 1000)),
"lastRunDuration": str((checkEndTime - checkStartTime) * 1000),
"message": output
}
def checkPsTimeStampForMonitor(csec,ts, process):
printd("Time difference=%s" %str(int(csec) - int(ts)))
tmin = (int(csec) - int(ts) )/60
if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS:
raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS))
printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin)))
return False
return True
def removeFile(fileName):
if path.isfile(fileName):
printd("Removing the file %s" %fileName)
os.remove(fileName)
def loadPsFromUnMonitFile():
dict_unmonit = {}
try:
fd = open(Config.UNMONIT_PS_FILE)
except:
printd("Failed to open file %s " %(Config.UNMONIT_PS_FILE))
return StatusCodes.FAILED
ps = fd.read()
if not ps:
printd("File %s content is empty " %Config.UNMONIT_PS_FILE)
return StatusCodes.FAILED
printd(ps)
plist = ps.split(',')
plist.remove('')
for i in plist:
dict_unmonit[i.split(':')[0]] = i.split(':')[1]
fd.close()
return dict_unmonit
def writePsListToUnmonitFile(umonit_update):
printd("Write updated unmonit list to file")
line=''
for i in umonit_update:
line+=str(i)+":"+str(umonit_update[i])+','
printd(line)
try:
fd=open(Config.UNMONIT_PS_FILE,'w')
except:
printd("Failed to open file %s " %Config.UNMONIT_PS_FILE)
return StatusCodes.FAILED
fd.write(line)
fd.close()
def is_emtpy(struct):
"""
Checks wether the given struct is empty or not
"""
if struct:
return False
else:
return True
def main():
def main(checkType = "basic"):
startTime = time.time()
'''
Step1 : Get Config
Step1 : Get Services Config
'''
printd("monitoring started")
temp_dict = getConfig()
configDict = getServicesConfig()
'''
Step2: Monitor and Raise Alert
Step2: Monitor services and Raise Alerts
'''
monitProcess( temp_dict )
monitResult = {}
failingChecks = []
if checkType == "basic":
monitResult, failingChecks = monitProcess(configDict)
'''
Step3: Run health check scripts as needed
'''
hc_data = getHealthChecksData()
if hc_data is not None and "health_checks_enabled" in hc_data and hc_data['health_checks_enabled']:
hc_exclude = hc_data["excluded_health_checks"] if "excluded_health_checks" in hc_data else []
for f in os.listdir(Config.HEALTH_CHECKS_DIR):
if f in hc_exclude:
continue
fpath = path.join(Config.HEALTH_CHECKS_DIR, f)
if path.isfile(fpath) and os.access(fpath, os.X_OK):
ret = execute(fpath, checkType)
if len(ret) == 0:
continue
if "success" in ret and ret["success"].lower() == "false":
failingChecks.append(f)
monitResult[f] = ret
'''
Step4: Write results to the json file for admins/management server to read
'''
endTime = time.time()
monitResult["lastRun"] = {
"start": str(datetime.fromtimestamp(startTime)),
"end": str(datetime.fromtimestamp(endTime)),
"duration": str(endTime - startTime)
}
with open(checkType + "_" + Config.MONITOR_RESULT_FILE_SUFFIX, 'w') as f:
json.dump(monitResult, f, ensure_ascii=False)
failChecksFile = checkType + "_" + Config.FAILING_CHECKS_FILE
if len(failingChecks) > 0:
fcs = ""
for fc in failingChecks:
fcs = fcs + fc + ","
fcs = fcs[0:-1]
with open(failChecksFile, 'w') as f:
f.write(fcs)
elif path.isfile(failChecksFile):
os.remove(failChecksFile)
if __name__ == "__main__":
main()
checkType = "basic"
if len(sys.argv) == 2:
if sys.argv[1] == "advanced":
main("advanced")
elif sys.argv[1] == "basic":
main("basic")
else:
printd("Error: Unknown type of test: " + sys.argv)
else:
main("basic")
main("advanced")

View File

@ -21,7 +21,8 @@ from nose.plugins.attrib import attr
from marvin.cloudstackTestCase import cloudstackTestCase
from marvin.cloudstackAPI import (stopVirtualMachine,
stopRouter,
startRouter)
startRouter,
getRouterHealthCheckResults)
from marvin.lib.utils import (cleanup_resources,
get_process_status)
from marvin.lib.base import (ServiceOffering,
@ -594,6 +595,75 @@ class TestRouterServices(cloudstackTestCase):
return
@attr(tags=["advanced"], required_hardware="true")
def test_04_RouterHealthChecksResults(self):
"""Test advanced zone router list contains health check records
"""
routers = list_routers(
self.apiclient,
account=self.account.name,
domainid=self.account.domainid,
fetchhealthcheckresults=True
)
self.assertEqual(isinstance(routers, list), True,
"Check for list routers response return valid data"
)
self.assertNotEqual(
len(routers), 0,
"Check list router response"
)
router = routers[0]
self.info("Router ID: %s & Router state: %s" % (
router.id, router.state
))
self.assertEqual(isinstance(router.healthcheckresults, list), True,
"Router response should contain it's health check result as list"
)
cmd = getRouterHealthCheckResults.getRouterHealthCheckResultsCmd()
cmd.routerid = router.id
cmd.performfreshchecks = True # Perform fresh checks as a newly created router may not have results
healthData = self.api_client.getRouterHealthCheckResults(cmd)
self.info("Router ID: %s & Router state: %s" % (
router.id, router.state
))
self.assertEqual(router.id, healthData.routerid,
"Router response should contain it's health check result so id should match"
)
self.assertEqual(isinstance(healthData.healthchecks, list), True,
"Router response should contain it's health check result as list"
)
self.verifyCheckTypes(healthData.healthchecks)
self.verifyCheckNames(healthData.healthchecks)
def verifyCheckTypes(self, healthChecks):
for checkType in ["basic", "advanced"]:
foundType = False
for check in healthChecks:
if check.checktype == checkType:
foundType = True
break
self.assertTrue(foundType,
"Router should contain health check results info for type: " + checkType
)
def verifyCheckNames(self, healthChecks):
for checkName in ["dns_check.py", "dhcp_check.py", "haproxy_check.py", "disk_space_check.py", "iptables_check.py", "gateways_check.py", "router_version_check.py"]:
foundCheck = False
for check in healthChecks:
if check.checkname == checkName:
foundCheck = True
break
self.assertTrue(foundCheck,
"Router should contain health check results info for check name: " + checkName
)
class TestRouterStopCreatePF(cloudstackTestCase):

View File

@ -48,6 +48,7 @@ function install_cloud_scripts() {
/root/{clearUsageRules.sh,reconfigLB.sh,monitorServices.py} \
/etc/profile.d/cloud.sh /etc/cron.daily/* /etc/cron.hourly/*
chmod +x /root/health_checks/*
chmod -x /etc/systemd/system/*
systemctl daemon-reload

View File

@ -291,6 +291,8 @@ var dictionary = {
"label.action.stop.instance.processing":"Stopping Instance....",
"label.action.stop.router":"Stop Router",
"label.action.stop.router.processing":"Stopping Router....",
"label.action.router.health.checks":"Get health checks result",
"label.perform.fresh.checks":"Perform fresh checks",
"label.action.stop.systemvm":"Stop System VM",
"label.action.stop.systemvm.processing":"Stopping System VM....",
"label.action.take.snapshot":"Take Snapshot",
@ -579,6 +581,7 @@ var dictionary = {
"label.continue":"Continue",
"label.continue.basic.install":"Continue with basic installation",
"label.copying.iso":"Copying ISO",
"label.copy.text": "Copy Text",
"label.corrections.saved":"Corrections saved",
"label.counter":"Counter",
"label.cpu":"CPU",
@ -1505,6 +1508,12 @@ var dictionary = {
"label.root.disk.offering":"Root Disk Offering",
"label.root.disk.size":"Root disk size (GB)",
"label.router.vm.scaled.up":"Router VM Scaled Up",
"label.router.health.checks":"Health Checks",
"label.router.health.check.name":"Check name",
"label.router.health.check.type":"Type",
"label.router.health.check.success":"Success",
"label.router.health.check.last.updated":"Last updated",
"label.router.health.check.details":"Details",
"label.routing":"Routing",
"label.routing.host":"Routing Host",
"label.rule":"Rule",
@ -1974,6 +1983,7 @@ var dictionary = {
"message.action.start.systemvm":"Please confirm that you want to start this system VM.",
"message.action.stop.instance":"Please confirm that you want to stop this instance.",
"message.action.stop.router":"All services provided by this virtual router will be interrupted. Please confirm that you want to stop this router.",
"message.action.router.health.checks":"Health checks result will be fetched from router.",
"message.action.stop.systemvm":"Please confirm that you want to stop this system VM.",
"message.action.take.snapshot":"Please confirm that you want to take a snapshot of this volume.",
"message.action.unmanage.cluster":"Please confirm that you want to unmanage the cluster.",

View File

@ -102,6 +102,64 @@
var $container = $('#cloudStack3-container');
var updateSharedConfigs = function() {
// Update global pagesize for list APIs in UI
$.ajax({
type: 'GET',
url: createURL('listConfigurations'),
data: {name: 'default.ui.page.size'},
dataType: 'json',
async: false,
success: function(data, textStatus, xhr) {
if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) {
var config = data.listconfigurationsresponse.configuration[0];
if (config && config.name == 'default.ui.page.size') {
pageSize = parseInt(config.value);
}
}
},
error: function(xhr) { // ignore any errors, fallback to the default
}
});
// Update global pagesize for sort key in UI
$.ajax({
type: 'GET',
url: createURL('listConfigurations'),
data: {name: 'sortkey.algorithm'},
dataType: 'json',
async: false,
success: function(data, textStatus, xhr) {
if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) {
var config = data.listconfigurationsresponse.configuration[0];
if (config && config.name == 'sortkey.algorithm') {
g_sortKeyIsAscending = config.value == 'true';
}
}
},
error: function(xhr) { // ignore any errors, fallback to the default
}
});
// Update global router health checks enabled
$.ajax({
type: 'GET',
url: createURL('listConfigurations'),
data: {name: 'router.health.checks.enabled'},
dataType: 'json',
async: false,
success: function(data, textStatus, xhr) {
if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) {
var config = data.listconfigurationsresponse.configuration[0];
if (config && config.name == 'router.health.checks.enabled') {
g_routerHealthChecksEnabled = config.value == 'true';
}
}
},
error: function(xhr) { // ignore any errors, fallback to the default
}
});
}
var loginArgs = {
$container: $container,
@ -170,61 +228,25 @@
}
});
// Update global pagesize for list APIs in UI
$.ajax({
type: 'GET',
url: createURL('listConfigurations'),
data: {name: 'default.ui.page.size'},
dataType: 'json',
async: false,
success: function(data, textStatus, xhr) {
if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) {
var config = data.listconfigurationsresponse.configuration[0];
if (config && config.name == 'default.ui.page.size') {
pageSize = parseInt(config.value);
}
}
},
error: function(xhr) { // ignore any errors, fallback to the default
}
});
updateSharedConfigs()
// Update global pagesize for sort key in UI
$.ajax({
type: 'GET',
url: createURL('listConfigurations'),
data: {name: 'sortkey.algorithm'},
dataType: 'json',
async: false,
success: function(data, textStatus, xhr) {
if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) {
var config = data.listconfigurationsresponse.configuration[0];
if (config && config.name == 'sortkey.algorithm') {
g_sortKeyIsAscending = config.value == 'true';
}
}
},
error: function(xhr) { // ignore any errors, fallback to the default
}
});
// Populate IDP list
$.ajax({
type: 'GET',
url: createURL('listIdps'),
dataType: 'json',
async: false,
success: function(data, textStatus, xhr) {
if (data && data.listidpsresponse && data.listidpsresponse.idp) {
var idpList = data.listidpsresponse.idp.sort(function (a, b) {
return a.orgName.localeCompare(b.orgName);
});
g_idpList = idpList;
}
},
error: function(xhr) {
}
});
// Populate IDP list
$.ajax({
type: 'GET',
url: createURL('listIdps'),
dataType: 'json',
async: false,
success: function(data, textStatus, xhr) {
if (data && data.listidpsresponse && data.listidpsresponse.idp) {
var idpList = data.listidpsresponse.idp.sort(function (a, b) {
return a.orgName.localeCompare(b.orgName);
});
g_idpList = idpList;
}
},
error: function(xhr) {
}
});
return userValid ? {
user: {
@ -337,6 +359,7 @@
})
}
});
updateSharedConfigs();
},
error: function(xmlHTTP) {
args.response.error();
@ -377,6 +400,7 @@
g_regionsecondaryenabled = null;
g_loginCmdText = null;
g_allowUserViewAllDomainAccounts = null;
g_routerHealthChecksEnabled = false;
// Remove any cookies
var cookies = document.cookie.split(";");

View File

@ -37,6 +37,7 @@ var g_idpList = null;
var g_appendIdpDomain = false;
var g_sortKeyIsAscending = false;
var g_allowUserViewAllDomainAccounts = false;
var g_routerHealthChecksEnabled = false;
//keyboard keycode
var keycode_Enter = 13;

View File

@ -9886,6 +9886,7 @@
listView: {
id: 'routers',
label: 'label.virtual.appliances',
horizontalOverflow: true,
fields: {
name: {
label: 'label.name'
@ -9914,7 +9915,19 @@
indicator: {
'Running': 'on',
'Stopped': 'off',
'Error': 'off'
'Error': 'off',
'Alert': 'warning'
}
},
healthchecksfailed: {
converter: function (str) {
if (str) return 'Failed'
return 'Passed';
},
label: 'label.health.check',
indicator: {
false: 'on',
true: 'warning'
}
},
requiresupgrade: {
@ -9922,6 +9935,12 @@
converter: cloudStack.converters.toBooleanText
}
},
preFilter: function () {
if (!g_routerHealthChecksEnabled) {
return ['healthchecksfailed']
}
return []
},
dataProvider: function (args) {
var array1 =[];
if (args.filterBy != null) {
@ -9982,44 +10001,47 @@
routers.push(item);
});
/*
* In project view, the first listRotuers API(without projectid=-1) will return the same objects as the second listRouters API(with projectid=-1),
* because in project view, all API calls are appended with projectid=[projectID].
* Therefore, we only call the second listRouters API(with projectid=-1) in non-project view.
*/
if (cloudStack.context && cloudStack.context.projects == null) { //non-project view
/*
* account parameter(account+domainid) and project parameter(projectid) are not allowed to be passed together to listXXXXXXX API.
* So, remove account parameter(account+domainid) from data2
*/
if ("account" in data2) {
delete data2.account;
}
if ("domainid" in data2) {
delete data2.domainid;
}
$.ajax({
url: createURL("listRouters&listAll=true&page=" + args.page + "&pagesize=" + pageSize + array1.join("") + "&projectid=-1"),
data: data2,
async: false,
success: function (json) {
var items = json.listroutersresponse.router ?
json.listroutersresponse.router:[];
$(items).map(function (index, item) {
routers.push(item);
});
}
});
}
args.response.success({
actionFilter: routerActionfilter,
data: $(routers).map(mapRouterType)
});
/*
* In project view, the first listRotuers API(without projectid=-1) will return the same objects as the second listRouters API(with projectid=-1),
* because in project view, all API calls are appended with projectid=[projectID].
* Therefore, we only call the second listRouters API(with projectid=-1) in non-project view.
*/
if (cloudStack.context && cloudStack.context.projects == null) { //non-project view
/*
* account parameter(account+domainid) and project parameter(projectid) are not allowed to be passed together to listXXXXXXX API.
* So, remove account parameter(account+domainid) from data2
*/
if ("account" in data2) {
delete data2.account;
}
if ("domainid" in data2) {
delete data2.domainid;
}
$.ajax({
url: createURL("listRouters&listAll=true&page=" + args.page + "&pagesize=" + pageSize + array1.join("") + "&projectid=-1"),
data: data2,
async: false,
success: function (json) {
var items = json.listroutersresponse.router ?
json.listroutersresponse.router:[];
var items = json.listroutersresponse.router ?
json.listroutersresponse.router:[];
$(items).map(function (index, item) {
routers.push(item);
});
}
});
}
args.response.success({
actionFilter: routerActionfilter,
data: $(routers).map(mapRouterType)
});
}
});
},
detailView: {
name: 'label.virtual.appliance.details',
@ -10542,6 +10564,56 @@
height: 640
}
}
},
healthChecks: {
label: 'label.action.router.health.checks',
createForm: {
title: 'label.action.router.health.checks',
desc: 'message.action.router.health.checks',
fields: {
performfreshchecks: {
label: 'label.perform.fresh.checks',
isBoolean: true
}
}
},
action: function (args) {
if (!g_routerHealthChecksEnabled) {
cloudStack.dialog.notice({
message: 'Router health checks are disabled. Please enable router.health.checks.enabled to execute this action'
})
args.response.success()
return
}
var data = {
'routerid': args.context.routers[0].id,
'performfreshchecks': (args.data.performfreshchecks === 'on')
};
$.ajax({
url: createURL('getRouterHealthCheckResults'),
dataType: 'json',
data: data,
async: true,
success: function (json) {
var healthChecks = json.getrouterhealthcheckresultsresponse.routerhealthchecks.healthchecks
var numChecks = healthChecks.length
var failedChecks = 0
$.each(healthChecks, function(idx, check) {
if (!check.success) failedChecks = failedChecks + 1
})
cloudStack.dialog.notice({
message: 'Found ' + numChecks + ' checks for router, with ' + failedChecks + ' failing checks. Please visit router > Health Checks tab to see details'
})
args.response.success();
}
});
},
messages: {
notification: function(args) {
return 'label.action.router.health.checks'
}
}
}
},
tabs: {
@ -10731,6 +10803,78 @@
}
});
}
},
healthCheckResults: {
title: 'label.router.health.checks',
listView: {
id: 'routerHealthCheckResults',
label: 'label.router.health.checks',
hideToolbar: true,
fields: {
checkname: {
label: 'label.router.health.check.name'
},
checktype: {
label: 'label.router.health.check.type'
},
success: {
label: 'label.router.health.check.success',
converter: function (args) {
if (args) {
return _l('True');
} else {
return _l('False');
}
},
indicator: {
true: 'on',
false: 'off'
}
},
lastupdated: {
label: 'label.router.health.check.last.updated'
}
},
actions: {
details: {
label: 'label.router.health.check.details',
action: {
custom: function (args) {
cloudStack.dialog.notice({
message: args.context.routerHealthCheckResults[0].details
})
}
}
}
},
dataProvider: function(args) {
if (!g_routerHealthChecksEnabled) {
cloudStack.dialog.notice({
message: 'Router health checks are disabled. Please enable router.health.checks.enabled to get data'
})
args.response.success({})
return
}
if (args.page > 1) {
// Only one page is supported as it's not list command.
args.response.success({});
return
}
$.ajax({
url: createURL('getRouterHealthCheckResults'),
data: {
'routerid': args.context.routers[0].id
},
success: function (json) {
var hcData = json.getrouterhealthcheckresultsresponse.routerhealthchecks.healthchecks
args.response.success({
data: hcData
});
}
});
}
}
}
}
}
@ -22273,6 +22417,7 @@
allowedActions.push("migrate");
allowedActions.push("diagnostics");
allowedActions.push("retrieveDiagnostics");
allowedActions.push("healthChecks");
}
} else if (jsonObj.state == 'Stopped') {
allowedActions.push("start");

View File

@ -168,8 +168,11 @@
} else {
if (needsRefresh) {
var $loading = $('<div>').addClass('loading-overlay');
$listView.prepend($loading);
if ($listView) {
$listView.prepend($loading);
} else {
$instanceRow.closest('.list-view').prepend($loading)
}
}
var actionArgs = {